knitr::opts_chunk$set(dev = "svg")

library(tidyverse)
library(data.table)
library(table1)
library(ggpubr)
library(ggthemr)
library(ggsci)
library(cowplot)
library(ggplot2)
library(geepack)
library(sjPlot)
library(readxl)

source("/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/utils/toolbox.R")
source("/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/utils/timepoints_function.R")
source("/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/utils/custom_lollipop.R")

data_file_path <- "/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/data/"
figures_save_path <- "/Users/irenaeuschan/Documents/Irenaeus/CDK46_CH/figures/"
# CH Calls
sclc_df <- fread(paste0(data_file_path, "g1_sclc_df.minimum.csv"))                       # Small Cell Lung Cancer
tnbc_df <- fread(paste0(data_file_path, "g1_tnbc_df.minimum.csv"))                       # Triple Negative Breast Cancer
crc_df <- fread(paste0(data_file_path, "g1_crc_df.minimum.csv"))                         # Colorectal Cancer
untreated_df <- fread(paste0(data_file_path, "untreated_df.minimum.csv"))                # Untreated

g1_df <- rbind(
    sclc_df %>% mutate(Cohort = "SCLC") %>% mutate(DeID = as.character(DeID)), 
    tnbc_df %>% mutate(Cohort = "TNBC") %>% mutate(DeID = as.character(DeID), SampleDeID = as.character(SampleDeID)), 
    crc_df %>% mutate(Cohort = "CRC"), 
    untreated_df %>% mutate(Cohort = "Untreated") %>% mutate(SampleDeID = as.character(SampleDeID))
)

sclc_schematic <- paste0(data_file_path, "G1_SCLC.png")
tnbc_schematic <- paste0(data_file_path, "G1_TNBC.png")
crc_schematic <- paste0(data_file_path, "G1_CRC.png")

# Timepoints Dataframe
sclc_tp <- fread(paste0(data_file_path, "sclc_timepoints.minimum.csv"))                  # Small Cell Lung Cancer
tnbc_tp <- fread(paste0(data_file_path, "tnbc_timepoints.minimum.csv"))                  # Triple Negative Breast Cancer
crc_tp <- fread(paste0(data_file_path, "crc_timepoints.minimum.csv"))                    # Colorectal Cancer

g1_tp <- rbind(
    sclc_tp %>% mutate(Cohort = if_else(Trilaciclib == "Untreated", "Untreated", "SCLC")), 
    tnbc_tp %>% mutate(Cohort = "TNBC"), 
    crc_tp %>% mutate(Cohort = "CRC")
) %>%
mutate(
    prepreVAF = ifelse(is.na(prepreVAF), 0, prepreVAF),
    preVAF = ifelse(is.na(preVAF), 0, preVAF),
    duringVAF = ifelse(is.na(duringVAF), 0, duringVAF),
    postVAF = ifelse(is.na(postVAF), 0, postVAF),
    postpostVAF = ifelse(is.na(postpostVAF), 0, postpostVAF)
) %>%
filter(ifelse(Cohort == "Untreated", change_in_days < 350, TRUE))   # To keep the Untreated to be similar to the time frame of the treated

# Demographics Dataframe
g1_demo <- fread(paste0(data_file_path, "g1_demo.csv"))
g1_extra_demo <- fread(paste0(data_file_path, "g1_sra_extra_columns.csv"))

# Blood Counts Lab Results
g1_lab <- fread(paste0(data_file_path, "g1_labresults.csv"))
# g1_demo %>%
#     left_join(
#         g1_extra_demo,
#         by=c("DeID"="DEID", "SampleDeID")
#     ) %>%
#     mutate(
#         SampleCollectionDate = case_when(
#             !is.na(SampleCollectionDate) ~ as.Date(SampleCollectionDate),
#             SampleDeID == "AP187910G01" ~ as.Date("2022-02-08"),
#         ),
#         sample_name = paste0(DeID, "_", SampleDeID),
#         sample_title = paste0(DeID, "_", SampleDeID, "_", Cohort, "_", Trilaciclib),
#         organism = "Homo sapien",
#         isolate = paste0(Cohort, " ", Trilaciclib, " ", TimePoint, " ", whichdraw),
#         age = ceiling(Age),
#         biomaterial_provider = "Dr. Kelly Bolton",
#         collection_date = SampleCollectionDate,
#         geo_loc_name = "United States of America",
#         sex = Sex,
#         tissue = "Blood"
#     ) %>%
#     dplyr::select(
#         sample_name, sample_title, organism, isolate, age, biomaterial_provider, collection_date, geo_loc_name, sex, tissue
#     )
    #fwrite("SRA_Upload_BioSample_Attributes.tsv", sep = "\t")

1 Table 1

label(g1_demo$Age) <- "Age at Treatment Start"
label(g1_demo$Sex) <- "Gender"
label(g1_demo$Race) <- "Race"
label(g1_demo$SmokingStatus) <- "Smoking Status"

for_table <- g1_demo %>% 
    filter(ifelse(Cohort == "TNBC", whichdraw == "PreTx" | whichdraw == "MidTx", whichdraw == "PreTx" | whichdraw == "PostTx")) %>%
    filter(DeID %in% c(g1_demo %>% 
                        filter(ifelse(Cohort == "TNBC", whichdraw == "PreTx" | whichdraw == "MidTx", whichdraw == "PreTx" | whichdraw == "PostTx")) %>%
                        group_by(Cohort, DeID) %>%
                        summarise(n = n()) %>%
                        filter(n > 1) %>%
                        pull(DeID))
    )
## `summarise()` has grouped output by 'Cohort'. You can override using the
## `.groups` argument.
table1(~ Age + Sex + Race + Ethnicity + SmokingStatus | Cohort + Trilaciclib, data = for_table %>% filter(whichdraw == "PreTx"))
CRC
SCLC
TNBC
Untreated
Overall
Placebo
(N=65)
Trilaciclib
(N=60)
Placebo
(N=37)
Trilaciclib
(N=28)
Placebo
(N=11)
Trilaciclib
(N=23)
Untreated
(N=176)
Placebo
(N=113)
Trilaciclib
(N=111)
Untreated
(N=176)
Age at Treatment Start
Mean (SD) 54.7 (11.5) 55.2 (11.6) 64.9 (8.14) 63.8 (8.33) 60.8 (14.2) 56.3 (11.1) 65.2 (10.8) 58.6 (11.7) 57.6 (11.3) 65.2 (10.8)
Median [Min, Max] 55.0 [30.0, 79.0] 55.5 [28.0, 74.0] 66.0 [39.0, 83.0] 65.5 [45.0, 78.0] 57.0 [41.0, 86.0] 55.0 [34.0, 74.0] 66.8 [29.9, 87.9] 59.0 [30.0, 86.0] 60.0 [28.0, 78.0] 66.8 [29.9, 87.9]
Gender
Female 26 (40.0%) 21 (35.0%) 13 (35.1%) 12 (42.9%) 11 (100%) 23 (100%) 91 (51.7%) 50 (44.2%) 56 (50.5%) 91 (51.7%)
Male 39 (60.0%) 39 (65.0%) 24 (64.9%) 16 (57.1%) 0 (0%) 0 (0%) 85 (48.3%) 63 (55.8%) 55 (49.5%) 85 (48.3%)
Race
Asian 3 (4.6%) 3 (5.0%) 1 (2.7%) 0 (0%) 0 (0%) 1 (4.3%) 2 (1.1%) 4 (3.5%) 4 (3.6%) 2 (1.1%)
Black 5 (7.7%) 2 (3.3%) 0 (0%) 0 (0%) 3 (27.3%) 1 (4.3%) 27 (15.3%) 8 (7.1%) 3 (2.7%) 27 (15.3%)
Unknown 3 (4.6%) 5 (8.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (1.1%) 3 (2.7%) 5 (4.5%) 2 (1.1%)
White 54 (83.1%) 48 (80.0%) 35 (94.6%) 28 (100%) 8 (72.7%) 20 (87.0%) 145 (82.4%) 97 (85.8%) 96 (86.5%) 145 (82.4%)
Other 0 (0%) 2 (3.3%) 0 (0%) 0 (0%) 0 (0%) 1 (4.3%) 0 (0%) 0 (0%) 3 (2.7%) 0 (0%)
American Indian or Alaska Native 0 (0%) 0 (0%) 1 (2.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0.9%) 0 (0%) 0 (0%)
Ethnicity
Hispanic or Latino 7 (10.8%) 7 (11.7%) 1 (2.7%) 0 (0%) 0 (0%) 2 (8.7%) 0 (0%) 8 (7.1%) 9 (8.1%) 0 (0%)
Not Hispanic or Latino 55 (84.6%) 48 (80.0%) 35 (94.6%) 28 (100%) 11 (100%) 21 (91.3%) 170 (96.6%) 101 (89.4%) 97 (87.4%) 170 (96.6%)
Unknown 3 (4.6%) 5 (8.3%) 1 (2.7%) 0 (0%) 0 (0%) 0 (0%) 6 (3.4%) 4 (3.5%) 5 (4.5%) 6 (3.4%)
Smoking Status
Unknown 65 (100%) 60 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 16 (9.1%) 65 (57.5%) 60 (54.1%) 16 (9.1%)
Current 0 (0%) 0 (0%) 10 (27.0%) 10 (35.7%) 1 (9.1%) 0 (0%) 9 (5.1%) 11 (9.7%) 10 (9.0%) 9 (5.1%)
Former 0 (0%) 0 (0%) 26 (70.3%) 18 (64.3%) 2 (18.2%) 6 (26.1%) 0 (0%) 28 (24.8%) 24 (21.6%) 0 (0%)
Never 0 (0%) 0 (0%) 1 (2.7%) 0 (0%) 8 (72.7%) 17 (73.9%) 151 (85.8%) 9 (8.0%) 17 (15.3%) 151 (85.8%)

2 Plotting Variables

panel_theme_basic <- theme_bw() + theme(
  panel.border = element_blank(),
  legend.title = element_blank(),
  legend.key.size = unit(5, "mm"),
  legend.text = element_text(size = 22),
  legend.position = "top",
  legend.direction = "horizontal",
  plot.subtitle = element_text(hjust = 0.5, size = 24),
  plot.title = element_text(face = "bold", hjust = 0, vjust = -2, size = 24),
  panel.grid.major = element_blank(),
  strip.background = element_blank(),
  strip.text = element_text(size = 22),
  axis.text.y = element_text(size = 22),
  axis.text.x = element_text(size = 22),
  axis.title = element_text(size = 24),
  axis.line = element_line(colour = "black"),
  plot.margin = margin(1, 1, 1, 1, "cm")
)

default_sig <- function(y_position, xmin, xmax, model_pvalue, model_stars, tip_length = 0.03, size = 1, textsize = 10, color = "black", show_values = TRUE, hide_ns = TRUE) {
  if (show_values){
    list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = paste0("p = ", format(model_pvalue, digits = 5), model_stars), tip_length = tip_length, size = size, textsize = textsize, color = color)
  } else {
    if (model_stars == "") { model_stars <- "ns"}
    if (hide_ns & model_stars == "ns") { 
      list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = "", tip_length = 0, size = 0, textsize = 0, color = "black")
    } else {
      list(y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = model_stars, tip_length = tip_length, size = size, textsize = textsize, color = color)
    }
  }
}

default_sig_data <- function(data, y_position, xmin, xmax, model_pvalue, model_stars, tip_length = 0.03, size = 1, textsize = 10, color = "black", fill = "black", show_values = TRUE, hide_ns = TRUE) {
  if (show_values) {
    list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = paste0("p = ", format(model_pvalue, digits = 5), model_stars), tip_length = tip_length, size = size, textsize = textsize, color = color, fill = fill)
  } else {
    if (model_stars == "") { model_stars <- "ns"}
    if (hide_ns & model_stars == "ns") { 
      list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = "", tip_length = 0, size = 0, textsize = 0, color = "black", fill = "black")
    } else {
      list(data = data, y_position = y_position, xmin = c(xmin), xmax = c(xmax), annotation = model_stars, tip_length = tip_length, size = size, textsize = textsize, color = color, fill = fill)
    }
  }
}

add_significance_annotation <- function(p, label, pos, start, end, size_text = 10, size_line = 1) {
    p +
      annotate("text", x = (pos + 0.1), y = (start + end)/2, label = label, size = size_text) +
      annotate("segment", x = pos, xend = pos, y = start, yend = end, size = size_line) +
      annotate("segment", x = pos - 0.05, xend = pos + 0.008, y = start, yend = start, size = size_line) +
      annotate("segment", x = pos - 0.05, xend = pos + 0.008, y = end, yend = end, size = size_line)
}

signif.num <- function(x, ns = FALSE) {
  if (ns) {
    symbols = c("***", "**", "*", "ns")
  } else {
    symbols = c("***", "**", "*", "")
  }
  
  symnum(unlist(x), corr = FALSE, na = FALSE, legend = T,
         cutpoints = c(0, 0.001, 0.01, 0.05, 1), 
         symbols = symbols)
}

3 Default Variables

gene_list = c("DNMT3A", "TET2", "ASXL1", "TP53", "PPM1D", "CHEK2", "JAK2", "SRSF2", "SF3B1")
DDR_genes = c("TP53", "PPM1D", "CHEK2")
treatment_colors = c('Untreated' = '#C0C0C0', 'Placebo' = '#7AD3F7', 'Trilaciclib' = '#F37B79')
treatment_colors = c('Untreated' = '#BC3C29FF', 'Placebo' = '#E18727FF', 'Trilaciclib' = '#20854EFF')

4 Basic Processing - Merging ALL Information Together

g1_tp <- g1_tp %>%
    left_join(
        g1_df %>%
            dplyr::select(
                key, Gene,
                VariantClass, gene_aachange, oncoKB, oncoKB_reviewed,
                n.loci.vep, source.totals.loci, n.loci.truncating.vep, source.totals.loci.truncating, n.HGVSp, source.totals.p, n.HGVSc, source.totals.c, CosmicCount, heme_cosmic_count, myeloid_cosmic_count
            ) %>%
            rowwise() %>%
            mutate(non_na_count = sum(across(everything(), ~ !is.na(.)))) %>%
            ungroup() %>%
            group_by(key, Gene) %>%
            slice_max(order_by = non_na_count, with_ties = FALSE) %>%
            ungroup() %>%
            dplyr::select(-non_na_count),
        by = c("key", "Gene")
    )

g1_tp <- g1_tp %>%
    left_join(
        g1_demo %>% 
            filter(whichdraw == "PreTx") %>%
            dplyr::select(DeID, Age, Sex, Race, Ethnicity, SmokingStatus)
    )

g1_lab_changes <- g1_lab %>% 
            pivot_wider(
                id_cols = c(Cohort, subject),
                names_from = c(Testcode, whichdraw),
                values_from = Labvalue
            ) %>%
            mutate(
                perc_BASO_of_WBC_PreTx = BASO_PreTx / WBC_PreTx,
                perc_EOS_of_WBC_PreTx = EOS_PreTx / WBC_PreTx,
                perc_LYM_of_WBC_PreTx = LYM_PreTx / WBC_PreTx,
                perc_MONO_of_WBC_PreTx = MONO_PreTx / WBC_PreTx,
                perc_NEUT_of_WBC_PreTx = NEUT_PreTx / WBC_PreTx,

                perc_BASO_of_WBC_PostTx = BASO_PostTx / WBC_PostTx,
                perc_EOS_of_WBC_PostTx = EOS_PostTx / WBC_PostTx,
                perc_LYM_of_WBC_PostTx = LYM_PostTx / WBC_PostTx,
                perc_MONO_of_WBC_PostTx = MONO_PostTx / WBC_PostTx,
                perc_NEUT_of_WBC_PostTx = NEUT_PostTx / WBC_PostTx,

                perc_BASO_of_WBC_MidTx = BASO_MidTx / WBC_MidTx,
                perc_EOS_of_WBC_MidTx = EOS_MidTx / WBC_MidTx,
                perc_LYM_of_WBC_MidTx = LYM_MidTx / WBC_MidTx,
                perc_MONO_of_WBC_MidTx = MONO_MidTx / WBC_MidTx,
                perc_NEUT_of_WBC_MidTx = NEUT_MidTx / WBC_MidTx,

                change_in_prop_BASO = case_when(
                    Cohort == "SCLC" ~ perc_BASO_of_WBC_PostTx - perc_BASO_of_WBC_PreTx,
                    Cohort == "TNBC" ~ perc_BASO_of_WBC_MidTx - perc_BASO_of_WBC_PreTx,
                    Cohort == "CRC" ~ perc_BASO_of_WBC_PostTx - perc_BASO_of_WBC_PreTx
                ),
                change_in_prop_EOS = case_when(
                    Cohort == "SCLC" ~ perc_EOS_of_WBC_PostTx - perc_EOS_of_WBC_PreTx,
                    Cohort == "TNBC" ~ perc_EOS_of_WBC_MidTx - perc_EOS_of_WBC_PreTx,
                    Cohort == "CRC" ~ perc_EOS_of_WBC_PostTx - perc_EOS_of_WBC_PreTx
                ),
                change_in_prop_NEUT = case_when(
                    Cohort == "SCLC" ~ perc_NEUT_of_WBC_PostTx - perc_NEUT_of_WBC_PreTx,
                    Cohort == "TNBC" ~ perc_NEUT_of_WBC_MidTx - perc_NEUT_of_WBC_PreTx,
                    Cohort == "CRC" ~ perc_NEUT_of_WBC_PostTx - perc_NEUT_of_WBC_PreTx,
                ),
                change_in_prop_LYM = case_when(
                    Cohort == "SCLC" ~ perc_LYM_of_WBC_PostTx - perc_LYM_of_WBC_PreTx,
                    Cohort == "TNBC" ~ perc_LYM_of_WBC_MidTx - perc_LYM_of_WBC_PreTx,
                    Cohort == "CRC" ~ perc_LYM_of_WBC_PostTx - perc_LYM_of_WBC_PreTx
                ),
                change_in_prop_MONO = case_when(
                    Cohort == "SCLC" ~ perc_MONO_of_WBC_PostTx - perc_MONO_of_WBC_PreTx,
                    Cohort == "TNBC" ~ perc_MONO_of_WBC_MidTx - perc_MONO_of_WBC_PreTx,
                    Cohort == "CRC" ~ perc_MONO_of_WBC_PostTx - perc_MONO_of_WBC_PreTx
                ),
                change_in_prop_GRAN = rowSums(across(c(change_in_prop_BASO, change_in_prop_EOS, change_in_prop_NEUT)), na.rm = TRUE)
            )    

g1_tp <- g1_tp %>%
    left_join(
        g1_lab_changes %>%
        dplyr::select(Cohort, subject, 
            change_in_prop_BASO, change_in_prop_EOS, change_in_prop_LYM, change_in_prop_MONO, change_in_prop_NEUT, change_in_prop_GRAN
        ),
        by = c("DeID"="subject", "Cohort")
    )

# Simplifying Race, Ethnicity Variables
g1_tp <- g1_tp %>%
    mutate(
        Race = case_when(
            Race == "White" ~ "White",
            Race == "Unknown" ~ "White",
            TRUE ~ "Non-White"
        ),
        Race = case_when(
            Race == "White" & Ethnicity == "Hispanic or Latino" ~ "Non-White",
            Race == "White" & Ethnicity == "Not Hispanic or Latino" ~ "White",
            Race == "White" & Ethnicity == "Unknown" ~ "White",
            Race == "Non-White" & Ethnicity == "Hispanic or Latino" ~ "Non-White",
            Race == "Non-White" & Ethnicity == "Not Hispanic or Latino" ~ "Non-White",
            Race == "Non-White" & Ethnicity == "Unknown" ~ "Non-White"
        )
    )

5 Reference Variables

g1_tp$Trilaciclib <- factor(g1_tp$Trilaciclib, levels = c("Untreated", "Trilaciclib", "Placebo"))
g1_tp$Gene_Class <- factor(g1_tp$Gene_Class, levels = c("DTA", "DDR"))
g1_tp$Gene <- factor(g1_tp$Gene, levels = c("TP53", "PPM1D", "CHEK2", "DNMT3A", "TET2", "ASXL1", "SF3B1", "SRSF2", "JAK2"))
g1_tp$Cohort <- factor(g1_tp$Cohort, levels = c("Untreated", "SCLC", "TNBC", "CRC"))
g1_tp$Sex <- factor(g1_tp$Sex, levels = c("Male", "Female"))
#g1_tp$Race <- factor(g1_tp$Race, levels = c("White", "Black", "Asian", "American Indian or Alaska Native", "Other", "Unknown"))
g1_tp$Race <- factor(g1_tp$Race, levels = c("White", "Non-White"))
#g1_tp$Ethnicity <- factor(g1_tp$Ethnicity, levels = c("Hispanic or Latino", "Not Hispanic or Latino", "Unknown"))
g1_tp$SmokingStatus <- factor(g1_tp$SmokingStatus, levels = c("Never", "Former", "Current", "Unknown"))

6 Mutation Table - SCLC and Untreated ONLY

num_mutations_per_person <- g1_df %>%
    filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
    filter(average_AF >= 0.002) %>%                             # Anything less than this may be noise.
    group_by(DeID, Gene, Trilaciclib, whichdraw) %>%
    summarise(n = sum(putative_driver)) %>%
    #mutate(Gene = ifelse(Gene == "", "No Mutation", Gene)) %>%
    pivot_wider(names_from = Gene, values_from = n, values_fill = 0) %>%
    ungroup() #%>%
    # mutate(
    #     DDR_Mutations = rowSums(.[, c("PPM1D", "TP53", "CHEK2")], na.rm = TRUE),
    # )

g1_mut_table <- g1_demo %>% 
    filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
    dplyr::select(DeID, Trilaciclib, Age, whichdraw) %>% 
    left_join(num_mutations_per_person) %>%
    group_by(DeID, Trilaciclib, Age, whichdraw) %>%
    summarise_if(is.numeric, function(x) sum(x != 0, na.rm = TRUE))

g1_mut_table %>%
    filter(whichdraw == "PreTx") %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0)
    ) %>%
    group_by(Trilaciclib) %>%
    summarise(n_ch = sum(CH_Binary), n_total = n())
Trilaciclib n_ch n_total
Placebo 26 37
Trilaciclib 20 28
Untreated 127 176

7 Mutation Table - TNBC and CRC

num_mutations_per_person_tnbc <- g1_df %>%
    filter(Cohort == "TNBC" | Cohort == "Untreated") %>%
    filter(average_AF >= 0.002) %>%
    group_by(DeID, Gene, Trilaciclib, whichdraw) %>%
    summarise(n = sum(putative_driver)) %>%
    pivot_wider(names_from = Gene, values_from = n, values_fill = 0) %>%
    ungroup() 

g1_mut_table_tnbc <- g1_demo %>% 
    filter(Cohort == "TNBC" | Cohort == "Untreated") %>%
    dplyr::select(DeID, Trilaciclib, Age, whichdraw) %>% 
    left_join(num_mutations_per_person_tnbc) %>%
    group_by(DeID, Trilaciclib, Age, whichdraw) %>%
    summarise_if(is.numeric, function(x) sum(x != 0, na.rm = TRUE))

num_mutations_per_person_crc <- g1_df %>%
    filter(Cohort == "CRC" | Cohort == "Untreated") %>%
    filter(average_AF >= 0.002) %>%
    group_by(DeID, Gene, Trilaciclib, whichdraw) %>%
    summarise(n = sum(putative_driver)) %>%
    pivot_wider(names_from = Gene, values_from = n, values_fill = 0) %>%
    ungroup() 

g1_mut_table_crc <- g1_demo %>% 
    filter(Cohort == "CRC" | Cohort == "Untreated") %>%
    dplyr::select(DeID, Trilaciclib, Age, whichdraw) %>% 
    left_join(num_mutations_per_person_crc) %>%
    group_by(DeID, Trilaciclib, Age, whichdraw) %>%
    summarise_if(is.numeric, function(x) sum(x != 0, na.rm = TRUE))

8 Figure 1a - SCLC Schematic

fig1a <- ggdraw() + draw_image(sclc_schematic)
fig1a

9 Figure 1B - Ribbon Plot

fig1b <- g1_mut_table %>%
    filter(whichdraw == "PreTx", Age >= 45, Age <= 85) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0),
        Treatment = ifelse(Trilaciclib == "Untreated", "Healthy Control", "SCLC Pre-Treatment"),
        Treatment = factor(Treatment, levels = c("Healthy Control", "SCLC Pre-Treatment"))
    ) %>%
    ggplot(
        aes(
            x = Age,
            y = CH_Binary,
            fill = Treatment
        )
    ) +
    geom_smooth(
        aes(
            fill = Treatment,
            color = Treatment
        ),
        method = "gam",
        formula = y ~ s(x),
        method.args = list(family = "binomial"),
        size = 1.5,
        se = TRUE,
        alpha = 0.1
    ) +
    scale_y_continuous(labels = scales::label_percent(accuracy = 1L), expand = expansion(add = c(0.01, 0.01))) +
    scale_x_continuous(breaks = seq(0, 90, by = 5)) + 
    labs(x = "Age", y = "Frequency (%)") +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    #scale_fill_manual(values = treatment_colors) +
    #scale_color_manual(values = treatment_colors) +
    theme(
        plot.margin = margin(0, 1, 1, 1, "cm")
    )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fig1b

ggsave(paste0(figures_save_path, "Figure1B.png"), plot = fig1b, width = 14, height = 8, dpi = 300)

10 Figure 1C - Waterfall Plot

# g1_tp %>% filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
#     filter(
#         all_time_points == TRUE |
#         (all_time_points == FALSE & called == TRUE) |
#         (all_time_points == FALSE & called == FALSE & preVAF >= 0.001)
#     )
    
df_to_maf <- g1_df %>% filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
    filter(VariantClass != "synonymous_variant", putative_driver == 1, whichdraw == "PreTx") %>%
    tidyr::separate(key, into= c("Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2"), sep = ":") %>%
    mutate(
        Tumor_Sample_Barcode = DeID,
        Hugo_Symbol = Gene,
        Variant_Classification = VariantClass,
        Start_Position = as.numeric(Start_Position),
        Variant_Type = case_when(
            nchar(Reference_Allele) == nchar(Tumor_Seq_Allele2) ~ "SNP",
            nchar(Reference_Allele) < nchar(Tumor_Seq_Allele2) ~ "INS",
            nchar(Reference_Allele) > nchar(Tumor_Seq_Allele2) ~ "DEL"
        ),
        End_Position = case_when(
            Variant_Type == "SNP" ~ Start_Position,
            Variant_Type == "INS" ~ Start_Position,
            Variant_Type == "DEL" ~ (Start_Position + nchar(Reference_Allele) - 1)
        ),
        stringsAsFactors = FALSE
    )
clinical_data <- g1_df %>%
    mutate(
        Tumor_Sample_Barcode = DeID,
        Cohort = ifelse(Cohort == "Untreated", "Healthy Control", "SCLC"),
        Cohort = factor(Cohort, levels = c("SCLC", "Healthy Control")),
        Treatment = ifelse(Trilaciclib == "Untreated", "Healthy Control", Trilaciclib),
        Treatment = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib")),
        stringAsFactors = FALSE
    )
maf_to_plot <- maftools::read.maf(maf = df_to_maf, vc_nonSyn = names(table(df_to_maf$VariantClass)), clinicalData = clinical_data)
## -Validating
## --Non MAF specific values in Variant_Classification column:
##   missense_variant
##   frameshift_variant
##   stop_gained
##   splice_region_variant
##   inframe_insertion
##   inframe_deletion
## -Summarizing
## -Processing clinical data
## -Finished in 0.149s elapsed (0.129s cpu)
maftools::oncoplot(maf = maf_to_plot, genes = gene_list, clinicalFeatures = c("Cohort", "Treatment"))

11 Figure 1D - VAF Bin Mutation Bar Plot

df_to_plot <- g1_df %>% filter(Cohort == "SCLC" | Cohort == "Untreated") %>%
    filter(whichdraw == "PreTx") %>%
    mutate(
        vaf_bin = case_when(
            average_AF >= 0.002 & average_AF < 0.01 ~ "0.2-1% VAF", 
            average_AF >= 0.01 & average_AF < 0.05 ~ "1-5% VAF",
            average_AF >= 0.05 ~ ">=5% VAF",
            average_AF >= 0.001 & average_AF < 0.002 ~ "0.1-0.2% VAF"
        )
    ) %>%
    filter(vaf_bin != "<0.2% VAF") %>%
    group_by(Gene, vaf_bin) %>%
    summarise(n = n()) %>%
    group_by(Gene) %>%
    mutate(
        total = sum(n),
        percentage = (n / total) * 100,
        #vaf_bin = factor(vaf_bin, levels = c("0.1-0.2% VAF", "0.2-1% VAF", "1-5% VAF", ">=5% VAF")),
        vaf_bin = factor(vaf_bin, levels = c(">=5% VAF", "1-5% VAF", "0.2-1% VAF", "0.1-0.2% VAF")),
        Gene = factor(Gene, levels = c(gene_list))
    )
## `summarise()` has grouped output by 'Gene'. You can override using the
## `.groups` argument.
fig1d <- df_to_plot %>%
    ggplot(
        aes(
            x = Gene,
            y = percentage,
            fill = vaf_bin
        )
    ) +
    geom_bar(stat = "identity") +
    labs(
        x = "Gene",
        y = "Percentage of Mutations at Baseline",
        fill = "VAF Bin"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #axis.text.y = element_text(angle = 90, hjust = 1)
        plot.margin = margin(2, 1, 1, 1, "cm")
    ) +
    scale_fill_nejm()
fig1d

ggsave(paste0(figures_save_path, "Figure1D.png"), plot = fig1d, width = 10, height = 8, dpi = 300)

12 Figure 1E - Proportion of Patients with CH Mutations

model_treatment_gene <- map_dfr(c(gene_list), function(gene){
    rbind(
        glm(
            formula = as.formula(paste(gene, "~ Trilaciclib + Age")),
            family = binomial(link = "logit"),
            data = g1_mut_table %>%
                filter(whichdraw == "PostTx") %>%
                mutate(Trilaciclib = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib")))
        ) %>%
        sjPlot::get_model_data(type = "est") %>%
        mutate(Gene = gene),
        glm(
            formula = as.formula(paste(gene, "~ Trilaciclib + Age")),
            family = binomial(link = "logit"),
            data = g1_mut_table %>%
                filter(whichdraw == "PostTx") %>%
                filter(Trilaciclib != "Untreated") %>%
                mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo")))
        ) %>%
        sjPlot::get_model_data(type = "est") %>%
        mutate(Gene = gene)
    )
})
model_treatment_gene <- model_treatment_gene %>% filter(term != "Age")

g1_mut_table <- g1_mut_table %>%
    mutate(
        DTA_Mutations = ifelse(rowSums(across(c("DNMT3A", "TET2", "ASXL1")), na.rm = TRUE) > 0, 1, 0),
        DDR_Mutations = ifelse(rowSums(across(c("PPM1D", "TP53", "CHEK2")), na.rm = TRUE) > 0, 1, 0),
        Treatment = ifelse(Trilaciclib == "Untreated", "Healthy Control", "SCLC Post-Treatment"),
        Treatment = factor(Treatment, levels = c("Healthy Control", "SCLC Post-Treatment"))
    )

g1_mut_table %>%
    filter(whichdraw == "PostTx") %>%
    group_by(Trilaciclib) %>%
    summarise(n_ch = sum(DDR_Mutations), n_total = n())
Trilaciclib n_ch n_total
Placebo 21 37
Trilaciclib 8 28
Untreated 42 178
glm(
    formula = as.formula("DDR_Mutations ~ Treatment + Age"),
    family = binomial(link = "logit"),
    data = g1_mut_table %>%
        filter(whichdraw == "PostTx")
) %>% sjPlot::get_model_data(type = "est")
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
TreatmentSCLC Post-Treatment 3.405394 0.3304140 0.95 1.791850 6.574561 3.708561 Inf 0.0002084 *** 3.41 *** pos 2 1.825 2.175
Age 1.078908 0.0182065 0.95 1.042594 1.119981 4.171552 Inf 0.0000303 *** 1.08 *** pos 1 0.825 1.175
fig1e <- g1_mut_table %>%
    filter(whichdraw == "PostTx") %>%
    group_by(Treatment) %>%
    dplyr::select(-Age, -Trilaciclib, -whichdraw, -DTA_Mutations, -DDR_Mutations) %>%
    summarise_if(is.numeric, sum, na.rm = TRUE) %>%
    pivot_longer(cols = -c(Treatment), names_to = "Gene", values_to = "n") %>%
    group_by(Treatment, Gene) %>%
    left_join(
        g1_mut_table %>%
            filter(whichdraw == "PostTx") %>%
            group_by(Treatment) %>%
            summarise(total_patients = n())
    ) %>%
    mutate(
        Proportion = n / total_patients,
        Gene = factor(Gene, levels = c(gene_list)),
        Treatment = factor(Treatment, levels = c("Healthy Control", "SCLC Post-Treatment"))
        #Trilaciclib = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))
    ) %>%
    ggplot(
        aes(
        x = Gene,
        y = Proportion,
        fill = Treatment
        )
    ) +
    geom_bar(color = "black", stat = "identity", position = position_dodge()) +
    labs(
        x = "",
        y = "Percent of Patients\nwith Mutated Gene\n Post-Treatment",
        fill = "Treatment"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = c(0.7, 0.8),
        legend.direction = "vertical",
        plot.margin = margin(2, 0, 1, 1, "cm")
    ) +
    scale_y_continuous(labels = scales::percent, limits = c(NA, 0.6), expand = c(0, 0)) +
    scale_fill_nejm() +
    scale_color_nejm()
## Joining with `by = join_by(Treatment)`
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
    #scale_fill_manual(values = treatment_colors)
    # do.call(geom_signif, default_sig(0.6, 0.75, 1, model_treatment_gene$p.value[1], model_treatment_gene$p.stars[1], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.65, 0.75, 1.25, model_treatment_gene$p.value[2], model_treatment_gene$p.stars[2], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.7, 1, 1.25, model_treatment_gene$p.value[3], model_treatment_gene$p.stars[3], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.35, 1.75, 2, model_treatment_gene$p.value[4], model_treatment_gene$p.stars[4], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.4, 1.75, 2.25, model_treatment_gene$p.value[5], model_treatment_gene$p.stars[5], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.45, 2, 2.25, model_treatment_gene$p.value[6], model_treatment_gene$p.stars[6], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.2, 2.75, 3, model_treatment_gene$p.value[7], model_treatment_gene$p.stars[7], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.25, 2.75, 3.25, model_treatment_gene$p.value[8], model_treatment_gene$p.stars[8], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.3, 3, 3.25, model_treatment_gene$p.value[9], model_treatment_gene$p.stars[9], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.45, 3.75, 4, model_treatment_gene$p.value[10], model_treatment_gene$p.stars[10], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.52, 3.75, 4.25, model_treatment_gene$p.value[11], model_treatment_gene$p.stars[11], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.6, 4, 4.25, model_treatment_gene$p.value[12], model_treatment_gene$p.stars[12], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.45, 4.75, 5, model_treatment_gene$p.value[13], model_treatment_gene$p.stars[13], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.52, 4.75, 5.25, model_treatment_gene$p.value[14], model_treatment_gene$p.stars[14], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.6, 5, 5.25, model_treatment_gene$p.value[15], model_treatment_gene$p.stars[15], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.25, 5.75, 6, model_treatment_gene$p.value[16], model_treatment_gene$p.stars[16], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.32, 5.75, 6.25, model_treatment_gene$p.value[17], model_treatment_gene$p.stars[17], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.4, 6, 6.25, model_treatment_gene$p.value[18], model_treatment_gene$p.stars[18], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig(0.1, 6.75, 7, model_treatment_gene$p.value[19], model_treatment_gene$p.stars[19], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.15, 6.75, 7.25, model_treatment_gene$p.value[20], model_treatment_gene$p.stars[20], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.2, 7, 7.25, model_treatment_gene$p.value[21], model_treatment_gene$p.stars[21], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.1, 7.75, 8, model_treatment_gene$p.value[22], model_treatment_gene$p.stars[22], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.15, 7.75, 8.25, model_treatment_gene$p.value[23], model_treatment_gene$p.stars[23], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.2, 8, 8.25, model_treatment_gene$p.value[24], model_treatment_gene$p.stars[24], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.1, 8.75, 9, model_treatment_gene$p.value[25], model_treatment_gene$p.stars[25], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.15, 8.75, 9.25, model_treatment_gene$p.value[26], model_treatment_gene$p.stars[26], show_values = FALSE)) +
    # do.call(geom_signif, default_sig(0.2, 9, 9.25, model_treatment_gene$p.value[27], model_treatment_gene$p.stars[27], show_values = FALSE)) #+
    #do.call(geom_signif, default_sig(0.6, 9.75, 10, model_treatment_gene$p.value[28], model_treatment_gene$p.stars[28], show_values = FALSE, hide_ns = FALSE)) +
    #do.call(geom_signif, default_sig(0.65, 9.75, 10.25, model_treatment_gene$p.value[29], model_treatment_gene$p.stars[29], show_values = FALSE, hide_ns = FALSE)) +
    #do.call(geom_signif, default_sig(0.7, 10, 10.25, model_treatment_gene$p.value[30], model_treatment_gene$p.stars[30], show_values = FALSE, hide_ns = FALSE)) 
fig1e

ggsave(paste0(figures_save_path, "Figure1E.png"), plot = fig1e, width = 10, height = 6, dpi = 300)

13 Figure 1E - Proportion of Patients with CH Mutations Continued…

model_treatment_gene_class <- map_dfr(c("DTA_Mutations", "DDR_Mutations"), function(gene_class){
    rbind(
        glm(
            formula = as.formula(paste(gene_class, "~ Trilaciclib + Age")),
            family = binomial(link = "logit"),
            data = g1_mut_table %>%
                filter(whichdraw == "PostTx") %>%
                filter(Trilaciclib != "Untreated") %>%
                mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo")))
        ) %>%
        sjPlot::get_model_data(type = "est") %>%
        mutate(
            GeneClass = ifelse(gene_class == "DTA_Mutations", "DTA", "DDR"))
    )
})
model_treatment_gene_class <- model_treatment_gene_class %>% filter(term != "Age")

fig1e_cont <- g1_mut_table %>%
    filter(whichdraw == "PostTx", Trilaciclib != "Untreated") %>%
    group_by(Trilaciclib) %>%
    dplyr::select(Trilaciclib, DTA_Mutations, DDR_Mutations) %>%
    summarise_if(is.numeric, sum, na.rm = TRUE) %>%
    pivot_longer(cols = -c(Trilaciclib), names_to = "Gene", values_to = "n") %>%
    left_join(
        g1_mut_table %>%
            filter(whichdraw == "PostTx", Trilaciclib != "Untreated") %>%
            group_by(Trilaciclib) %>%
            summarise(total_patients = n())
    ) %>%
    mutate(
        Proportion = n / total_patients,
        Trilaciclib = factor(Trilaciclib, levels = c("Placebo", "Trilaciclib")),
        Gene = ifelse(Gene == "DTA_Mutations", "DTA", "DDR"),
        Gene = factor(Gene, levels = c("DTA", "DDR"))
    ) %>%
    ggplot(
        aes(
        x = Gene,
        y = Proportion,
        fill = Trilaciclib
        )
    ) +
    geom_bar(color = "black", stat = "identity", position = position_dodge()) +
    labs(
        x = "",
        y = "Percent of Patients\nwith Mutated Gene\nPost-Treatment",
        fill = "Trilaciclib"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = c(0.85, 0.6),
        legend.direction = "vertical",
        #plot.margin = margin(1, 0, 1, 1, "cm")
    ) +
    scale_y_continuous(labels = scales::percent, limits = c(NA, 0.75), expand = c(0, 0)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig(0.65, 0.75, 1.25, model_treatment_gene_class$p.value[1], model_treatment_gene_class$p.stars[1], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig(0.65, 1.75, 2.25, model_treatment_gene_class$p.value[2], model_treatment_gene_class$p.stars[2], show_values = FALSE))
## Joining with `by = join_by(Trilaciclib)`
fig1e_cont

ggsave(paste0(figures_save_path, "Figure1E_Continued.png"), plot = fig1e_cont, width = 10, height = 6, dpi = 300)

14 Figure 1F - Small Cell Lung Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1"
        )
    )

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion
## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)
model_growth_rate_class <- model_growth_rate_class %>% 
    filter(term == "UntreatedVSTrilaciclib" | term == "UntreatedVSPlacebo" | term == "TrilaciclibVSPlacebo")

df_to_plot %>%
    filter(Gene_Class == "DDR") %>%
    filter(Trilaciclib != "Untreated") %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    group_by(Trilaciclib) %>%
    summarise(n = mean(growth_rate))
Trilaciclib n
Trilaciclib 0.0100532
Placebo 0.0152614
counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DTA" | Gene_Class == "DDR") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig1f <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DTA" | Gene_Class == "DDR") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 7, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 7, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
fig1f
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Warning: Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).

ggsave(paste0(figures_save_path, "Figure1F.png"), plot = fig1f, width = 12, height = 10, dpi = 300)
## Warning: Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).

15 Figure 2C - Colorectal Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance"
        )
    )

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            #mutate(
            #    preAge = as.numeric(preAge)
            #) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            #mutate(
            #    preAge = as.numeric(preAge)
            #) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                #preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion
## Warning in diff(as.numeric(id)): NAs introduced by coercion
## Warning in diff(as.numeric(id)): NAs introduced by coercion
## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)
model_growth_rate_class <- model_growth_rate_class %>% 
    filter(term == "UntreatedVSTrilaciclib" | term == "UntreatedVSPlacebo" | term == "TrilaciclibVSPlacebo")

counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig2c <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 7, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 7, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
fig2c

ggsave(paste0(figures_save_path, "Figure2C.png"), plot = fig2c, width = 12, height = 8, dpi = 300)

16 Figure 2D - Triple Negative Breast Cancer Growth Rate Plot

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1"
        )
    )

model_growth_rate_class_geeglm <- rbind(
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DDR"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            mutate(
                preAge = as.numeric(preAge)
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    ),
    geeglm(
        formula = growth_rate ~ Trilaciclib + preAge,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(
                preAge = as.numeric(preAge),
                Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))
            ) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf"),
        id = DeID,
        corstr = "exchangeable"
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "preAge" ~ "Age"
        ),
        Gene_Class = "DTA"
    )
)
## Warning in diff(as.numeric(id)): NAs introduced by coercion
## Warning in diff(as.numeric(id)): NAs introduced by coercion
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)
model_growth_rate_class <- model_growth_rate_class %>% 
    filter(term == "UntreatedVSTrilaciclib" | term == "UntreatedVSPlacebo" | term == "TrilaciclibVSPlacebo")

counts <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(n = n(), .groups = "drop")

fig2d <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%       # We are filtering the data to only include the DDR and DTA gene classes.
    ggplot(
        aes(
            x = Gene_Class,                 # Notice how we are using the Gene_Class variable for the x-axis where before we were using Trilaciclib.
            y = growth_rate, 
            fill = Trilaciclib              # We will still group data by Trilaciclib, but our main focus is on the Gene_Class variable.
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 5, 0.75, 1, model_growth_rate_class$p.value[1], model_growth_rate_class$p.stars[1], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 6, 0.75, 1.25, model_growth_rate_class$p.value[2], model_growth_rate_class$p.stars[2], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DTA"), 7, 1, 1.25, model_growth_rate_class$p.value[3], model_growth_rate_class$p.stars[3], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 5, 1.75, 2, model_growth_rate_class$p.value[4], model_growth_rate_class$p.stars[4], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 6, 1.75, 2.25, model_growth_rate_class$p.value[5], model_growth_rate_class$p.stars[5], show_values = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene_Class == "DDR"), 7, 2, 2.25, model_growth_rate_class$p.value[6], model_growth_rate_class$p.stars[6], show_values = FALSE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
fig2d

ggsave(paste0(figures_save_path, "Figure2D.png"), plot = fig2d, width = 12, height = 8, dpi = 300)

17 Figure 2E - Genes

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    mutate(Variant_Classification = if_else(VariantClass == "missense_variant", "Missense", "Nonsense"))

model_growth_rate_gene <- map_dfr(c("TP53", "PPM1D", "CHEK2"), function(gene) {
    print(gene)
    model_growth_rate_gene <- rbind(
        glm(
            formula = growth_rate ~ Trilaciclib + Age,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = case_when(
                term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
                term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
                TRUE ~ term
            ),
            gene = gene
        ),
        glm(
            formula = growth_rate ~ Trilaciclib + Cohort + Age,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(Trilaciclib != "Untreated") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        filter(!grepl("Cohort", term)) %>%
        mutate(
            term = case_when(
                term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
                TRUE ~ term
            ),
            gene = gene
        )
    )
    model_growth_rate_gene
})
## [1] "TP53"
## [1] "PPM1D"
## [1] "CHEK2"
model_growth_rate_gene
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax gene
UntreatedVSTrilaciclib 0.0033874 0.0019547 0.95 -0.0004438 0.0072186 1.7329229 138 0.0831094 0.00 pos 3 2.825 3.175 TP53
UntreatedVSPlacebo 0.0059025 0.0019497 0.95 0.0020812 0.0097238 3.0274252 138 0.0024665 ** 0.01 ** pos 2 1.825 2.175 TP53
Age 0.0001009 0.0000446 0.95 0.0000136 0.0001883 2.2640330 138 0.0235721 * 0.00 * pos 1 0.825 1.175 TP53
TrilaciclibVSPlacebo 0.0026568 0.0012011 0.95 0.0003027 0.0050108 2.2119820 123 0.0269679 * 0.00 * pos 4 3.825 4.175 TP53
Age 0.0001056 0.0000509 0.95 0.0000059 0.0002053 2.0755326 123 0.0379372 * 0.00 * pos 1 0.825 1.175 TP53
UntreatedVSTrilaciclib 0.0091865 0.0033285 0.95 0.0026628 0.0157102 2.7599497 171 0.0057810 ** 0.01 ** pos 3 2.825 3.175 PPM1D
UntreatedVSPlacebo 0.0149956 0.0033377 0.95 0.0084538 0.0215374 4.4927820 171 0.0000070 *** 0.01 *** pos 2 1.825 2.175 PPM1D
Age -0.0000390 0.0000578 0.95 -0.0001522 0.0000743 -0.6747297 171 0.4998475 -0.00 neg 1 0.825 1.175 PPM1D
TrilaciclibVSPlacebo 0.0057269 0.0012940 0.95 0.0031908 0.0082631 4.4258505 163 0.0000096 *** 0.01 *** pos 4 3.825 4.175 PPM1D
Age -0.0000745 0.0000632 0.95 -0.0001983 0.0000493 -1.1790850 163 0.2383643 -0.00 neg 1 0.825 1.175 PPM1D
UntreatedVSTrilaciclib 0.0050014 0.0031218 0.95 -0.0011173 0.0111201 1.6020752 67 0.1091390 0.01 pos 3 2.825 3.175 CHEK2
UntreatedVSPlacebo 0.0079642 0.0029400 0.95 0.0022018 0.0137265 2.7088733 67 0.0067512 ** 0.01 ** pos 2 1.825 2.175 CHEK2
Age 0.0000618 0.0000770 0.95 -0.0000891 0.0002128 0.8030364 67 0.4219537 0.00 pos 1 0.825 1.175 CHEK2
TrilaciclibVSPlacebo 0.0029743 0.0018407 0.95 -0.0006334 0.0065820 1.6158468 59 0.1061274 0.00 pos 4 3.825 4.175 CHEK2
Age 0.0000590 0.0000897 0.95 -0.0001168 0.0002348 0.6576834 59 0.5107416 0.00 pos 1 0.825 1.175 CHEK2
model_growth_rate_variant_class <- map_dfr(c("Missense", "Nonsense"), function(classification) {    
    print(classification)
    model_growth_rate_variant_class <- rbind(
        glm(
            formula = growth_rate ~ Trilaciclib,
            data = df_to_plot %>%
                filter(Gene == "TP53") %>%
                filter(Variant_Classification == classification) %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = ifelse(term == "TrilaciclibTrilaciclib", "UntreatedVSTrilaciclib", "UntreatedVSPlacebo"),
            classification = classification
        ),
        glm(
            formula = growth_rate ~ Trilaciclib + Cohort,
            data = df_to_plot %>%
                filter(Gene == "TP53") %>%
                filter(Variant_Classification == classification) %>%
                filter(Trilaciclib != "Untreated") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
        filter(!grepl("Cohort", term)) %>%
        mutate(
            term = "TrilaciclibVSPlacebo",
            classification = classification
        )
    )
    model_growth_rate_variant_class
})
## [1] "Missense"
## [1] "Nonsense"
model_growth_rate_variant_class
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax classification
UntreatedVSTrilaciclib 0.0033104 0.0019768 0.95 -0.0005639 0.0071848 1.6746765 117 0.0939977 0.00 pos 2 1.825 2.175 Missense
UntreatedVSPlacebo 0.0059889 0.0019876 0.95 0.0020932 0.0098846 3.0130927 117 0.0025860 ** 0.01 ** pos 1 0.825 1.175 Missense
TrilaciclibVSPlacebo 0.0025881 0.0013431 0.95 -0.0000442 0.0052204 1.9270258 103 0.0539764 0.00 pos 3 2.825 3.175 Missense
UntreatedVSTrilaciclib -0.0112287 0.0071922 0.95 -0.0253252 0.0028679 -1.5612160 19 0.1184728 -0.01 neg 2 1.825 2.175 Nonsense
UntreatedVSPlacebo -0.0092531 0.0071018 0.95 -0.0231723 0.0046661 -1.3029301 19 0.1925986 -0.01 neg 1 0.825 1.175 Nonsense
TrilaciclibVSPlacebo 0.0018658 0.0033842 0.95 -0.0047671 0.0084988 0.5513315 17 0.5814065 0.00 pos 3 2.825 3.175 Nonsense
model_growth_rate_gene_cohort <- map_dfr(c("TP53", "PPM1D", "CHEK2"), function(gene) {
    map_dfr(c("SCLC", "TNBC", "CRC"), function(cancer) {
        print(paste(gene, cancer))
        rbind(
            glm(
                formula = growth_rate ~ Trilaciclib,
                data = df_to_plot %>%
                    filter(Gene == gene & Cohort == cancer) %>%
                    filter(growth_rate != "Inf" & growth_rate != "-Inf")
            ) %>%
                sjPlot::get_model_data(type = "est") %>%
                mutate(
                    term = ifelse(term == "TrilaciclibTrilaciclib",
                        "UntreatedVSTrilaciclib",
                        "UntreatedVSPlacebo"
                    )
                ),
            glm(
                formula = growth_rate ~ Trilaciclib,
                data = df_to_plot %>%
                    filter(Gene == gene & Cohort == cancer) %>%
                    filter(Trilaciclib != "Untreated") %>%
                    filter(growth_rate != "Inf" & growth_rate != "-Inf")
            ) %>%
                sjPlot::get_model_data(type = "est") %>%
                mutate(
                    term = "TrilaciclibVSPlacebo"
                )
        ) %>%
            mutate(
                Gene = gene,
                Cohort = cancer
            )
    })
})
## [1] "TP53 SCLC"
## [1] "TP53 TNBC"
## [1] "TP53 CRC"
## [1] "PPM1D SCLC"
## [1] "PPM1D TNBC"
## [1] "PPM1D CRC"
## [1] "CHEK2 SCLC"
## [1] "CHEK2 TNBC"
## [1] "CHEK2 CRC"
df_to_plot %>% filter(growth_rate != "Inf" & growth_rate != "-Inf") %>% dplyr::select(Gene, Trilaciclib) %>% table()
##         Trilaciclib
## Gene     Untreated Trilaciclib Placebo
##   TP53          14          64      64
##   PPM1D          7          85      83
##   CHEK2          7          29      35
##   DNMT3A        81         202     222
##   TET2          42          58      99
##   ASXL1         10          16      24
##   SF3B1          1           5       9
##   SRSF2          2           1       5
##   JAK2           0           3       4
counts_genes <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene_Class == "DDR") %>%
    group_by(Trilaciclib, Gene) %>%
    summarise(n = n(), .groups = "drop")

counts_variant_class <- df_to_plot %>%
    filter(growth_rate != Inf & growth_rate != -Inf) %>%
    filter(Gene == "TP53") %>%
    group_by(Trilaciclib, Variant_Classification) %>%
    summarise(n = n(), .groups = "drop")

fig2e <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" ) %>%
    ggplot(
        aes(
            x = Gene,
            y = growth_rate, 
            fill = Trilaciclib
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts_genes, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 5, 0.75, 1, model_growth_rate_gene$p.value[1], model_growth_rate_gene$p.stars[1], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 6, 0.75, 1.25, model_growth_rate_gene$p.value[2], model_growth_rate_gene$p.stars[2], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 7, 1, 1.25, model_growth_rate_gene$p.value[3], model_growth_rate_gene$p.stars[3], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 5, 1.75, 2, model_growth_rate_gene$p.value[4], model_growth_rate_gene$p.stars[4], show_values = FALSE, hide_ns = FALSE)) + 
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 6, 1.75, 2.25, model_growth_rate_gene$p.value[5], model_growth_rate_gene$p.stars[5], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D"), 7.1, 2, 2.25, model_growth_rate_gene$p.value[6], model_growth_rate_gene$p.stars[6], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 5, 2.75, 3, model_growth_rate_gene$p.value[7], model_growth_rate_gene$p.stars[7], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 6, 2.75, 3.25, model_growth_rate_gene$p.value[8], model_growth_rate_gene$p.stars[8], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2"), 7.1, 3, 3.25, model_growth_rate_gene$p.value[9], model_growth_rate_gene$p.stars[9], show_values = FALSE, hide_ns = FALSE)) 
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 5, 3.75, 4, model_growth_rate_gene$p.value[10], model_growth_rate_gene$p.stars[10], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 6, 3.75, 4.25, model_growth_rate_gene$p.value[11], model_growth_rate_gene$p.stars[11], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "DNMT3A"), 7, 4, 4.25, model_growth_rate_gene$p.value[12], model_growth_rate_gene$p.stars[12], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 5, 4.75, 5, model_growth_rate_gene$p.value[13], model_growth_rate_gene$p.stars[13], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 6, 4.75, 5.25, model_growth_rate_gene$p.value[14], model_growth_rate_gene$p.stars[14], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TET2"), 7, 5, 5.25, model_growth_rate_gene$p.value[15], model_growth_rate_gene$p.stars[15], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 5, 5.75, 6, model_growth_rate_gene$p.value[16], model_growth_rate_gene$p.stars[16], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 6, 5.75, 6.25, model_growth_rate_gene$p.value[17], model_growth_rate_gene$p.stars[17], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "ASXL1"), 7, 6, 6.25, model_growth_rate_gene$p.value[18], model_growth_rate_gene$p.stars[18], show_values = FALSE, hide_ns = FALSE))

ext_fig_tp53_missense <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene == "TP53") %>%
    ggplot(
        aes(
            x = Variant_Classification,
            y = growth_rate, 
            fill = Trilaciclib
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts_variant_class, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = "TP53"
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 5, 0.75, 1, model_growth_rate_variant_class$p.value[1], model_growth_rate_variant_class$p.stars[1], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 6, 0.75, 1.25, model_growth_rate_variant_class$p.value[2], model_growth_rate_variant_class$p.stars[2], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 7, 1, 1.25, model_growth_rate_variant_class$p.value[3], model_growth_rate_variant_class$p.stars[3], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 5, 1.75, 2, model_growth_rate_variant_class$p.value[4], model_growth_rate_variant_class$p.stars[4], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 6, 1.75, 2.25, model_growth_rate_variant_class$p.value[5], model_growth_rate_variant_class$p.stars[5], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 7, 2, 2.25, model_growth_rate_variant_class$p.value[6], model_growth_rate_variant_class$p.stars[6], show_values = FALSE, hide_ns = FALSE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
ext_fig_tp53_nonsense <- df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene == "TP53", Variant_Classification == "Nonsense") %>%
    ggplot(
        aes(
            x = "Nonsense",
            y = growth_rate, 
            fill = Trilaciclib
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    geom_text(data = counts_variant_class %>% filter(Variant_Classification == "Nonsense"), aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = "TP53"
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = treatment_colors) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 5, 0.75, 1, model_growth_rate_variant_class$p.value[4], model_growth_rate_variant_class$p.stars[4], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 6, 0.75, 1.25, model_growth_rate_variant_class$p.value[5], model_growth_rate_variant_class$p.stars[5], show_values = FALSE, hide_ns = FALSE)) +
    do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53"), 7, 1, 1.25, model_growth_rate_variant_class$p.value[6], model_growth_rate_variant_class$p.stars[6], show_values = FALSE, hide_ns = FALSE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
interaction_colors <- c(
    "Untreated.Untreated" = "#BC3C29FF", 
    
    "Placebo.SCLC" = "#F4BC8C", 
    "Placebo.TNBC" = "#E18727FF",
    "Placebo.CRC" = "#A66421FF",
    
    "Trilaciclib.SCLC" = "#7ABD91",
    "Trilaciclib.TNBC" = "#20854EFF",
    "Trilaciclib.CRC" = "#166339FF"
)

df_to_plot %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene_Class == "DDR" ) %>%
    mutate(group = interaction(Trilaciclib, Cohort)) %>%
    ggplot(
        aes(
            x = Gene,
            y = growth_rate, 
            fill = group
        )
    ) +
    geom_hline(yintercept = 0, color = "black", alpha = 0.3) +
    geom_boxplot(outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=0.2), alpha = 0.3) +
    #geom_text(data = counts_genes, aes(label = n, y = 0.06), size = 8, position = position_dodge(width = 0.8)) +
    labs(
        y = "Growth Rate", 
        x = '', 
        title = ""
    ) +
    panel_theme_basic +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), breaks = c(-0.05, -0.01, -0.005, 0, 0.005, 0.01, 0.05), limit = c(NA, 1.5)) +
    scale_fill_manual(values = interaction_colors)

    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "SCLC"), 5, 0.75, 1, model_growth_rate_gene_cohort$p.value[1], model_growth_rate_gene_cohort$p.stars[1], show_values = FALSE, hide_ns = FALSE))
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "SCLC"), 6, 0.75, 1.25, model_growth_rate_gene_cohort$p.value[2], model_growth_rate_gene_cohort$p.stars[2], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "TNBC"), 7, 1, 1.25, model_growth_rate_gene_cohort$p.value[3], model_growth_rate_gene_cohort$p.stars[3], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "TNBC"), 5, 1.75, 2, model_growth_rate_gene_cohort$p.value[4], model_growth_rate_gene_cohort$p.stars[4], show_values = FALSE, hide_ns = FALSE)) + 
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "CRC"), 6, 1.75, 2.25, model_growth_rate_gene_cohort$p.value[5], model_growth_rate_gene_cohort$p.stars[5], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "TP53", Cohort == "CRC"), 7.1, 2, 2.25, model_growth_rate_gene_cohort$p.value[6], model_growth_rate_gene_cohort$p.stars[6], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "SCLC"), 5, 2.75, 3, model_growth_rate_gene_cohort$p.value[7], model_growth_rate_gene_cohort$p.stars[7], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "SCLC"), 6, 2.75, 3.25, model_growth_rate_gene_cohort$p.value[8], model_growth_rate_gene_cohort$p.stars[8], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "TNBC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[9], model_growth_rate_gene_cohort$p.stars[9], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "TNBC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[10], model_growth_rate_gene_cohort$p.stars[10], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "CRC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[11], model_growth_rate_gene_cohort$p.stars[11], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "PPM1D", Cohort == "CRC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[12], model_growth_rate_gene_cohort$p.stars[12], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "SCLC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[13], model_growth_rate_gene_cohort$p.stars[13], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "SCLC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[14], model_growth_rate_gene_cohort$p.stars[14], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "TNBC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[15], model_growth_rate_gene_cohort$p.stars[15], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "TNBC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[16], model_growth_rate_gene_cohort$p.stars[16], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "CRC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[17], model_growth_rate_gene_cohort$p.stars[17], show_values = FALSE, hide_ns = FALSE)) +
    # do.call(geom_signif, default_sig_data(df_to_plot %>% filter(Gene == "CHEK2", Cohort == "CRC"), 7.1, 3, 3.25, model_growth_rate_gene_cohort$p.value[18], model_growth_rate_gene_cohort$p.stars[18], show_values = FALSE, hide_ns = FALSE))

fig2e
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Warning: Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).

ggsave(paste0(figures_save_path, "Figure2E.png"), plot = fig2e, width = 20, height = 8, dpi = 300)
## Warning: Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).
## Removed 2 rows containing non-finite outside the scale range (`stat_signif()`).

18 Extended Figure

#source("utils/custom_lollipop.R")

# colors.mutation_type <- brewer.pal(8,"Dark2")[1:8]
# names(colors.mutation_type) <- c("frameshift_variant", "inframe_insertion", "splice_region_variant", "inframe_deletion", 'start_lost','stop_gained','missense_variant', "synonymous_variant")

# # g1_tp %>%
# #     filter(Cohort != "Untreated", Gene == "TP53") %>%
# #     mutate(
# #         ID = DeID,
# #         PROTEIN_CHANGE = gene_aachange,
# #         PROTEIN_POS = as.numeric(sub(".*?([0-9]+).*", "\\1", sub(".*_", "", gene_aachange))),
# #         hit = case_when(
# #             Trilaciclib == "Placebo" ~ 1,
# #             Trilaciclib == "Trilaciclib" ~ 2
# #         ),
# #         gene = "TP53",
# #         mutation_type = VariantClass
# #     ) %>%
# #     dplyr::select(PROTEIN_POS) %>%
# #     filter(PROTEIN_POS >= 95 & PROTEIN_POS <= 288)

# prot.info = get_protein_info("TP53")
# ext_fig_tp53_lollipop <- g1_tp %>%
#     filter(Cohort != "Untreated", Gene == "TP53") %>%
#     mutate(
#         ID = DeID,
#         PROTEIN_CHANGE = gene_aachange,
#         PROTEIN_POS = as.numeric(sub(".*?([0-9]+).*", "\\1", sub(".*_", "", gene_aachange))),
#         hit = case_when(
#             Trilaciclib == "Placebo" ~ 1,
#             Trilaciclib == "Trilaciclib" ~ 2
#         ),
#         gene = "TP53",
#         mutation_type = VariantClass
#     ) %>%
#     my_lollipop(dat.genetics.unique=., 
#                 current.gene="TP53", 
#                 protein_domain=prot.info$protein_domain, 
#                 protein_length=prot.info$protein_length, 
#                 cutoff.hotspot = 20,
#                 colors.mutation_type=colors.mutation_type)
# ggsave(paste0(figures_save_path, "ExtendedFigure1A.png"), plot = ext_fig_tp53_lollipop, width = 12, height = 8, dpi = 300)

# prot.info = get_protein_info("PPM1D")
# ext_fig_ppm1d_lollipop <- g1_tp %>%
#     filter(Cohort != "Untreated", Gene == "PPM1D") %>%
#     mutate(
#         ID = DeID,
#         PROTEIN_CHANGE = gene_aachange,
#         PROTEIN_POS = as.numeric(sub(".*?([0-9]+).*", "\\1", sub(".*_", "", gene_aachange))),
#         hit = case_when(
#             Trilaciclib == "Placebo" ~ 1,
#             Trilaciclib == "Trilaciclib" ~ 2
#         ),
#         gene = "PPM1D",
#         mutation_type = VariantClass
#     ) %>%
#     my_lollipop(dat.genetics.unique=., 
#                 current.gene="PPM1D", 
#                 protein_domain=prot.info$protein_domain, 
#                 protein_length=prot.info$protein_length, 
#                 cutoff.hotspot = 20,
#                 colors.mutation_type=colors.mutation_type)
# ggsave(paste0(figures_save_path, "ExtendedFigure1B.png"), plot = ext_fig_ppm1d_lollipop, width = 12, height = 8, dpi = 300)

# prot.info = get_protein_info("CHEK2")
# ext_fig_chek2_lollipop <- g1_tp %>%
#     filter(Cohort != "Untreated", Gene == "CHEK2") %>%
#     mutate(
#         ID = DeID,
#         PROTEIN_CHANGE = gene_aachange,
#         PROTEIN_POS = as.numeric(sub(".*?([0-9]+).*", "\\1", sub(".*_", "", gene_aachange))),
#         hit = case_when(
#             Trilaciclib == "Placebo" ~ 1,
#             Trilaciclib == "Trilaciclib" ~ 2
#         ),
#         gene = "CHEK2",
#         mutation_type = VariantClass
#     ) %>%
#     my_lollipop(dat.genetics.unique=., 
#                 current.gene="CHEK2", 
#                 protein_domain=prot.info$protein_domain, 
#                 protein_length=prot.info$protein_length, 
#                 cutoff.hotspot = 20,
#                 colors.mutation_type=colors.mutation_type)
# ggsave(paste0(figures_save_path, "ExtendedFigure1C.png"), plot = ext_fig_chek2_lollipop, width = 12, height = 8, dpi = 300)

19 Extended Figure 1 Bubble Plot

df_to_plot_ddr <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        ),
        Gene %in% DDR_genes,
        Cohort != "Untreated"
    ) %>%
    mutate(
        VariantClass = case_when(
            VariantClass == "missense_variant" ~ "Missense",
            VariantClass == "frameshift_variant" ~ "Frameshift",
            VariantClass == "inframe_insertion" ~ "Missense",
            VariantClass == "inframe_deletion" ~ "Missense",
            VariantClass == "splice_region_variant" ~ "Splicing",
            VariantClass == "start_lost" ~ "Nonsense",
            VariantClass == "stop_gained" ~ "Nonsense",
            VariantClass == "synonymous_variant" ~ "Synonymous",
            TRUE ~ "Other"
        ),
        VariantClass = factor(VariantClass, levels = c("Missense", "Frameshift", "Splicing", "Nonsense", "Synonymous", "Other"))
    )

ext_fig_tp53_bubble <- df_to_plot_ddr %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene == "TP53") %>%
    mutate(
        # size_bin = case_when(
        #     CosmicCount < 10 ~ "0-10",
        #     CosmicCount >= 10 & CosmicCount < 50 ~ "10-50",
        #     CosmicCount >= 50 & CosmicCount < 100 ~ "50-100",
        #     CosmicCount >= 100 ~ ">100"
        # ),
        size_bin = case_when(
            heme_cosmic_count < 10 ~ "0-10",
            heme_cosmic_count >= 10 & heme_cosmic_count < 25 ~ "10-25",
            heme_cosmic_count >= 25 & heme_cosmic_count < 50 ~ "25-50",
            heme_cosmic_count >= 50 ~ ">50"
        ),
        #size_bin = factor(size_bin, levels = c("0-10", "10-50", "50-100", ">100")),
        size_bin = factor(size_bin, levels = c("0-10", "10-25", "25-50", ">50"))
    ) %>%
    ggplot(
        aes(
            x = preVAF,
            y = growth_rate,
            color = Cohort,
            size = size_bin,
            shape = VariantClass
        )
    ) +
    geom_point(alpha = 0.7) +
    #scale_size_manual(values = c("0-10"= 6, "10-50"=8, "50-100"= 10, ">100"= 12)) +
    scale_size_manual(values = c("0-10"= 6, "10-25"=8, "25-50"= 10, ">50"= 12)) +
    scale_shape_manual(values = c("Missense" = 16, "Frameshift" = 17, "Splicing" = 18, "Nonsense" = 15, "Synonymous" = 1, "Other" = 2)) +
    panel_theme_basic +
    labs(
        x = "VAF Pre-Treatment",
        y = "Growth Rate",
        title = "TP53",
        color = "Clinical Trial:",
        size = "Heme Cosmic Count:",
        shape = "Mutation Type:"
    ) +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_x_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, NA),  breaks = c(0, 0.01, 0.05, 0.01, 0.05, 0.1)) +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, 0.05)) +
    theme(
        legend.position = c(0.60, 0.95),
        legend.box = "verticle",
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 18),
        legend.spacing = unit(0.01, "cm"),
        plot.margin = margin(1, 1, 1, 1, "cm"),
    ) +
    guides(
        color = guide_legend(byrow = TRUE, order = 1, override.aes = list(size = 5)),
        shape = guide_legend(byrow = TRUE, order = 2, override.aes = list(size = 5)),
        size = guide_legend(byrow = TRUE, order = 3, override.aes = list(size = 5))
    )

ggsave(paste0(figures_save_path, "ExtendedFigure1D.png"), plot = ext_fig_tp53_bubble, width = 10, height = 7, dpi = 300)

ext_fig_ppm1d_bubble <- df_to_plot_ddr %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene == "PPM1D") %>%
    mutate(
        # size_bin = case_when(
        #     CosmicCount < 10 ~ "0-10",
        #     CosmicCount >= 10 & CosmicCount < 50 ~ "10-50",
        #     CosmicCount >= 50 & CosmicCount < 100 ~ "50-100",
        #     CosmicCount >= 100 ~ ">100"
        # ),
        size_bin = case_when(
            heme_cosmic_count < 10 ~ "0-10",
            heme_cosmic_count >= 10 & heme_cosmic_count < 25 ~ "10-25",
            heme_cosmic_count >= 25 & heme_cosmic_count < 50 ~ "25-50",
            heme_cosmic_count >= 50 ~ ">50"
        ),
        #size_bin = factor(size_bin, levels = c("0-10", "10-50", "50-100", ">100")),
        size_bin = factor(size_bin, levels = c("0-10", "10-25", "25-50", ">50"))
    ) %>%
    ggplot(
        aes(
            x = preVAF,
            y = growth_rate,
            color = Cohort,
            size = size_bin,
            shape = VariantClass
        )
    ) +
    geom_point(alpha = 0.7) +
    #scale_size_manual(values = c("0-10"= 6, "10-50"=8, "50-100"= 10, ">100"= 12)) +
    scale_size_manual(values = c("0-10"= 6, "10-25"=8, "25-50"= 10, ">50"= 12)) +
    scale_shape_manual(values = c("Missense" = 16, "Frameshift" = 17, "Splicing" = 18, "Nonsense" = 15, "Synonymous" = 1, "Other" = 2)) +
    panel_theme_basic +
    labs(
        x = "VAF Pre-Treatment",
        y = "Growth Rate",
        title = "PPM1D",
        color = "Clinical Trial:",
        size = "Heme Cosmic Count:",
        shape = "Mutation Type:"
    ) +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_x_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, NA),  breaks = c(0, 0.01, 0.05, 0.01, 0.05, 0.1)) +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, 0.05)) +
    theme(
        legend.position = "none",
        plot.margin = margin(1, 1, 1, 1, "cm"),
    )
ggsave(paste0(figures_save_path, "ExtendedFigure1E.png"), plot = ext_fig_ppm1d_bubble, width = 10, height = 7, dpi = 300)

ext_fig_chek2_bubble <- df_to_plot_ddr %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    filter(Gene == "CHEK2") %>%
    mutate(
        # size_bin = case_when(
        #     CosmicCount < 10 ~ "0-10",
        #     CosmicCount >= 10 & CosmicCount < 50 ~ "10-50",
        #     CosmicCount >= 50 & CosmicCount < 100 ~ "50-100",
        #     CosmicCount >= 100 ~ ">100"
        # ),
        size_bin = case_when(
            heme_cosmic_count < 10 ~ "0-10",
            heme_cosmic_count >= 10 & heme_cosmic_count < 25 ~ "10-25",
            heme_cosmic_count >= 25 & heme_cosmic_count < 50 ~ "25-50",
            heme_cosmic_count >= 50 ~ ">50"
        ),
        #size_bin = factor(size_bin, levels = c("0-10", "10-50", "50-100", ">100")),
        size_bin = factor(size_bin, levels = c("0-10", "10-25", "25-50", ">50"))
    ) %>%
    ggplot(
        aes(
            x = preVAF,
            y = growth_rate,
            color = Cohort,
            size = size_bin,
            shape = VariantClass
        )
    ) +
    geom_point(alpha = 0.7) +
    #scale_size_manual(values = c("0-10"= 6, "10-50"=8, "50-100"= 10, ">100"= 12)) +
    scale_size_manual(values = c("0-10"= 6, "10-25"=8, "25-50"= 10, ">50"= 12)) +
    scale_shape_manual(values = c("Missense" = 16, "Frameshift" = 17, "Splicing" = 18, "Nonsense" = 15, "Synonymous" = 1, "Other" = 2)) +
    panel_theme_basic +
    labs(
        x = "VAF Pre-Treatment",
        y = "Growth Rate",
        title = "CHEK2",
        color = "Clinical Trial:",
        size = "Heme Cosmic Count:",
        shape = "Mutation Type:"
    ) +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_x_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, NA),  breaks = c(0, 0.01, 0.05, 0.01, 0.05, 0.1)) +
    scale_y_continuous(trans = scales::pseudo_log_trans(0.001), limit = c(NA, 0.05)) +
    theme(
        legend.position = "none",
        plot.margin = margin(1, 1, 1, 1, "cm"),
    )
ggsave(paste0(figures_save_path, "ExtendedFigure1F.png"), plot = ext_fig_chek2_bubble, width = 10, height = 7, dpi = 300)
ggsave(paste0(figures_save_path, "ExtendedFigure1G.png"), plot = ext_fig_tp53_missense, width = 10, height = 7, dpi = 300)

20 Sensitivity Analysis - Combined

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    )

model_growth_rate_gene <- map_dfr(c("TP53", "PPM1D", "CHEK2"), function(gene) {
    print(gene)
    model_growth_rate_gene <- rbind(
        glm(
            formula = growth_rate ~ Trilaciclib + Age + Sex + Race,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
            mutate(
                term = case_when(
                    term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
                    term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
                    TRUE ~ term
                ),
                gene = gene
            ),
        glm(
            formula = growth_rate ~ Trilaciclib + Cohort + Age + Sex + Race,
            data = df_to_plot %>%
                filter(Gene == gene) %>%
                filter(Trilaciclib != "Untreated") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
        ) %>% sjPlot::get_model_data(type = "est") %>%
            filter(!grepl("Cohort", term)) %>%
            mutate(
                term = case_when(
                    term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
                    TRUE ~ term
                ),
                gene = gene
            )
    )
    model_growth_rate_gene
})
## [1] "TP53"
## [1] "PPM1D"
## [1] "CHEK2"
model_growth_rate_gene
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax gene
UntreatedVSTrilaciclib 0.0034097 0.0019805 0.95 -0.0004720 0.0072914 1.7216448 136 0.0851339 0.00 pos 5 4.825 5.175 TP53
UntreatedVSPlacebo 0.0059762 0.0019604 0.95 0.0021338 0.0098185 3.0484061 136 0.0023006 ** 0.01 ** pos 4 3.825 4.175 TP53
Age 0.0001012 0.0000451 0.95 0.0000127 0.0001897 2.2420769 136 0.0249564 * 0.00 * pos 3 2.825 3.175 TP53
SexFemale 0.0009292 0.0011052 0.95 -0.0012369 0.0030954 0.8408001 136 0.4004599 0.00 pos 2 1.825 2.175 TP53
RaceNon-White 0.0006894 0.0015510 0.95 -0.0023505 0.0037293 0.4445030 136 0.6566790 0.00 pos 1 0.825 1.175 TP53
TrilaciclibVSPlacebo 0.0024756 0.0012320 0.95 0.0000609 0.0048903 2.0094296 121 0.0444916 * 0.00 * pos 6 5.825 6.175 TP53
Age 0.0001129 0.0000520 0.95 0.0000109 0.0002148 2.1695095 121 0.0300440 * 0.00 * pos 3 2.825 3.175 TP53
SexFemale -0.0002078 0.0016469 0.95 -0.0034358 0.0030201 -0.1261793 121 0.8995900 -0.00 neg 2 1.825 2.175 TP53
RaceNon-White 0.0016137 0.0016989 0.95 -0.0017161 0.0049436 0.9498649 121 0.3421809 0.00 pos 1 0.825 1.175 TP53
UntreatedVSTrilaciclib 0.0086118 0.0033611 0.95 0.0020242 0.0151994 2.5622008 169 0.0104011 * 0.01 * pos 5 4.825 5.175 PPM1D
UntreatedVSPlacebo 0.0146038 0.0033470 0.95 0.0080438 0.0211637 4.3632588 169 0.0000128 *** 0.01 *** pos 4 3.825 4.175 PPM1D
Age -0.0000389 0.0000588 0.95 -0.0001542 0.0000763 -0.6619998 169 0.5079713 -0.00 neg 3 2.825 3.175 PPM1D
SexFemale 0.0017895 0.0012853 0.95 -0.0007297 0.0043088 1.3922680 169 0.1638412 0.00 pos 2 1.825 2.175 PPM1D
RaceNon-White -0.0009448 0.0017225 0.95 -0.0043209 0.0024313 -0.5485064 169 0.5833442 -0.00 neg 1 0.825 1.175 PPM1D
TrilaciclibVSPlacebo 0.0058463 0.0013352 0.95 0.0032294 0.0084631 4.3787136 161 0.0000119 *** 0.01 *** pos 6 5.825 6.175 PPM1D
Age -0.0000812 0.0000646 0.95 -0.0002077 0.0000453 -1.2575795 161 0.2085439 -0.00 neg 3 2.825 3.175 PPM1D
SexFemale -0.0008648 0.0018065 0.95 -0.0044054 0.0026757 -0.4787521 161 0.6321150 -0.00 neg 2 1.825 2.175 PPM1D
RaceNon-White -0.0007589 0.0018075 0.95 -0.0043015 0.0027837 -0.4198567 161 0.6745901 -0.00 neg 1 0.825 1.175 PPM1D
UntreatedVSTrilaciclib 0.0051875 0.0031902 0.95 -0.0010651 0.0114402 1.6260890 65 0.1039307 0.01 pos 5 4.825 5.175 CHEK2
UntreatedVSPlacebo 0.0079558 0.0029887 0.95 0.0020981 0.0138134 2.6619786 65 0.0077683 ** 0.01 ** pos 4 3.825 4.175 CHEK2
Age 0.0000633 0.0000804 0.95 -0.0000943 0.0002209 0.7868569 65 0.4313657 0.00 pos 3 2.825 3.175 CHEK2
SexFemale -0.0007431 0.0016833 0.95 -0.0040423 0.0025561 -0.4414571 65 0.6588821 -0.00 neg 2 1.825 2.175 CHEK2
RaceNon-White 0.0005244 0.0027095 0.95 -0.0047861 0.0058350 0.1935500 65 0.8465283 0.00 pos 1 0.825 1.175 CHEK2
TrilaciclibVSPlacebo 0.0028331 0.0019012 0.95 -0.0008931 0.0065593 1.4901985 57 0.1361720 0.00 pos 6 5.825 6.175 CHEK2
Age 0.0000619 0.0000935 0.95 -0.0001213 0.0002451 0.6626867 57 0.5075312 0.00 pos 3 2.825 3.175 CHEK2
SexFemale -0.0012177 0.0026711 0.95 -0.0064530 0.0040176 -0.4558627 57 0.6484887 -0.00 neg 2 1.825 2.175 CHEK2
RaceNon-White 0.0013095 0.0028785 0.95 -0.0043323 0.0069513 0.4549289 57 0.6491604 0.00 pos 1 0.825 1.175 CHEK2
supp_table_4 <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Cohort + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(Gene_Class == "DTA") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
    sjPlot::get_model_data(type = "est") %>%
    filter(!grepl("Cohort", term)) %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
                filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Cohort + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
    sjPlot::get_model_data(type = "est") %>%
    filter(!grepl("Cohort", term)) %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)
# Supp Table 4
supp_table_4
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax Gene_Class
UntreatedVSTrilaciclib -0.0004193 0.0008560 0.95 -0.0020970 0.0012584 -0.4898279 748 0.6242557 -0.00 neg 5 4.825 5.175 DTA
UntreatedVSPlacebo 0.0004505 0.0008192 0.95 -0.0011551 0.0020560 0.5498981 748 0.5823893 0.00 pos 4 3.825 4.175 DTA
Age 0.0000097 0.0000282 0.95 -0.0000456 0.0000650 0.3437133 748 0.7310619 0.00 pos 3 2.825 3.175 DTA
SexFemale -0.0010127 0.0005726 0.95 -0.0021350 0.0001097 -1.7684285 748 0.0769893 -0.00 neg 2 1.825 2.175 DTA
RaceNon-White -0.0005097 0.0007889 0.95 -0.0020560 0.0010366 -0.6460652 748 0.5182371 -0.00 neg 1 0.825 1.175 DTA
TrilaciclibVSPlacebo 0.0009428 0.0006610 0.95 -0.0003526 0.0022383 1.4264390 614 0.1537417 0.00 pos 6 5.825 6.175 DTA
Age 0.0000076 0.0000325 0.95 -0.0000561 0.0000714 0.2347762 614 0.8143824 0.00 pos 3 2.825 3.175 DTA
SexFemale -0.0012016 0.0007347 0.95 -0.0026416 0.0002384 -1.6355022 614 0.1019438 -0.00 neg 2 1.825 2.175 DTA
RaceNon-White -0.0009631 0.0008755 0.95 -0.0026790 0.0007528 -1.1000411 614 0.2713142 -0.00 neg 1 0.825 1.175 DTA
UntreatedVSTrilaciclib 0.0061118 0.0016932 0.95 0.0027932 0.0094303 3.6096670 382 0.0003066 *** 0.01 *** pos 5 4.825 5.175 DDR
UntreatedVSPlacebo 0.0101728 0.0016769 0.95 0.0068861 0.0134595 6.0663505 382 0.0000000 *** 0.01 *** pos 4 3.825 4.175 DDR
Age 0.0000565 0.0000363 0.95 -0.0000147 0.0001276 1.5557795 382 0.1197605 0.00 pos 3 2.825 3.175 DDR
SexFemale 0.0009388 0.0008254 0.95 -0.0006790 0.0025567 1.1373626 382 0.2553867 0.00 pos 2 1.825 2.175 DDR
RaceNon-White 0.0006464 0.0011606 0.95 -0.0016283 0.0029211 0.5569530 382 0.5775595 0.00 pos 1 0.825 1.175 DDR
TrilaciclibVSPlacebo 0.0038896 0.0008921 0.95 0.0021411 0.0056380 4.3600833 353 0.0000130 *** 0.00 *** pos 6 5.825 6.175 DDR
Age 0.0000419 0.0000408 0.95 -0.0000381 0.0001220 1.0261655 353 0.3048136 0.00 pos 3 2.825 3.175 DDR
SexFemale -0.0003616 0.0012065 0.95 -0.0027264 0.0020031 -0.2997444 353 0.7643721 -0.00 neg 2 1.825 2.175 DDR
RaceNon-White 0.0013765 0.0012454 0.95 -0.0010644 0.0038174 1.1052767 353 0.2690397 0.00 pos 1 0.825 1.175 DDR
supp_table_5 <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + SmokingStatus,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("Untreated", "SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DTA") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = case_when(
                term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
                term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
                TRUE ~ term
            ),
            Gene_Class = "DTA"
        ),
    glm(
        formula = growth_rate ~ Trilaciclib + Cohort + Age + Sex + Race + SmokingStatus,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("SCLC", "TNBC"))) %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(Gene_Class == "DTA") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
        sjPlot::get_model_data(type = "est") %>%
        filter(!grepl("Cohort", term)) %>%
        mutate(
            term = case_when(
                term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
                TRUE ~ term
            ),
            Gene_Class = "DTA"
        ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + SmokingStatus,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("Untreated", "SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(
            term = case_when(
                term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
                term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
                TRUE ~ term
            ),
            Gene_Class = "DDR"
        ),
    glm(
        formula = growth_rate ~ Trilaciclib + Cohort + Age + Sex + Race + SmokingStatus,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("SCLC", "TNBC"))) %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
        sjPlot::get_model_data(type = "est") %>%
        filter(!grepl("Cohort", term)) %>%
        mutate(
            term = case_when(
                term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
                TRUE ~ term
            ),
            Gene_Class = "DDR"
        )
)
# Supp Table 5
supp_table_5
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax Gene_Class
UntreatedVSTrilaciclib -0.0012785 0.0012805 0.95 -0.0037883 0.0012312 -0.9984700 331 0.3180515 -0.00 neg 8 7.825 8.175 DTA
UntreatedVSPlacebo -0.0007904 0.0012115 0.95 -0.0031649 0.0015842 -0.6523508 331 0.5141749 -0.00 neg 7 6.825 7.175 DTA
Age 0.0000070 0.0000455 0.95 -0.0000821 0.0000961 0.1544334 331 0.8772680 0.00 pos 6 5.825 6.175 DTA
SexFemale -0.0001274 0.0009925 0.95 -0.0020727 0.0018179 -0.1283547 331 0.8978683 -0.00 neg 5 4.825 5.175 DTA
RaceNon-White 0.0004122 0.0013223 0.95 -0.0021793 0.0030038 0.3117728 331 0.7552132 0.00 pos 4 3.825 4.175 DTA
SmokingStatusFormer 0.0014399 0.0012494 0.95 -0.0010089 0.0038888 1.1524655 331 0.2491298 0.00 pos 3 2.825 3.175 DTA
SmokingStatusCurrent -0.0018238 0.0015022 0.95 -0.0047681 0.0011204 -1.2141062 331 0.2247072 -0.00 neg 2 1.825 2.175 DTA
SmokingStatusUnknown -0.0023494 0.0039967 0.95 -0.0101827 0.0054840 -0.5878253 331 0.5566496 -0.00 neg 1 0.825 1.175 DTA
TrilaciclibVSPlacebo 0.0010339 0.0012377 0.95 -0.0013920 0.0034598 0.8353418 199 0.4035253 0.00 pos 7 6.825 7.175 DTA
Age -0.0000464 0.0000600 0.95 -0.0001640 0.0000713 -0.7722100 199 0.4399901 -0.00 neg 5 4.825 5.175 DTA
SexFemale -0.0007112 0.0018435 0.95 -0.0043243 0.0029019 -0.3857822 199 0.6996580 -0.00 neg 4 3.825 4.175 DTA
RaceNon-White -0.0011829 0.0018792 0.95 -0.0048661 0.0025002 -0.6294829 199 0.5290329 -0.00 neg 3 2.825 3.175 DTA
SmokingStatusFormer 0.0013803 0.0017165 0.95 -0.0019839 0.0047446 0.8041807 199 0.4212926 0.00 pos 2 1.825 2.175 DTA
SmokingStatusCurrent -0.0022942 0.0021815 0.95 -0.0065699 0.0019815 -1.0516701 199 0.2929509 -0.00 neg 1 0.825 1.175 DTA
UntreatedVSTrilaciclib 0.0036098 0.0022159 0.95 -0.0007334 0.0079529 1.6289910 196 0.1033149 0.00 pos 8 7.825 8.175 DDR
UntreatedVSPlacebo 0.0072309 0.0020215 0.95 0.0032688 0.0111931 3.5769444 196 0.0003476 *** 0.01 *** pos 7 6.825 7.175 DDR
Age -0.0000546 0.0000509 0.95 -0.0001543 0.0000451 -1.0740104 196 0.2828180 -0.00 neg 6 5.825 6.175 DDR
SexFemale 0.0031637 0.0015352 0.95 0.0001547 0.0061727 2.0607606 196 0.0393259 * 0.00 * pos 5 4.825 5.175 DDR
RaceNon-White 0.0024734 0.0016373 0.95 -0.0007356 0.0056824 1.5106698 196 0.1308726 0.00 pos 4 3.825 4.175 DDR
SmokingStatusFormer 0.0027653 0.0013955 0.95 0.0000303 0.0055004 1.9816716 196 0.0475160 * 0.00 * pos 3 2.825 3.175 DDR
SmokingStatusCurrent 0.0067009 0.0019253 0.95 0.0029273 0.0104744 3.4804055 196 0.0005007 *** 0.01 *** pos 2 1.825 2.175 DDR
SmokingStatusUnknown 0.0018030 0.0059519 0.95 -0.0098625 0.0134685 0.3029309 196 0.7619425 0.00 pos 1 0.825 1.175 DDR
TrilaciclibVSPlacebo 0.0026274 0.0014808 0.95 -0.0002750 0.0055298 1.7742285 169 0.0760254 0.00 pos 7 6.825 7.175 DDR
Age -0.0000373 0.0000578 0.95 -0.0001506 0.0000760 -0.6455768 169 0.5185534 -0.00 neg 5 4.825 5.175 DDR
SexFemale 0.0056182 0.0020333 0.95 0.0016329 0.0096034 2.7630620 169 0.0057262 ** 0.01 ** pos 4 3.825 4.175 DDR
RaceNon-White 0.0038061 0.0019488 0.95 -0.0000134 0.0076257 1.9530753 169 0.0508107 0.00 pos 3 2.825 3.175 DDR
SmokingStatusFormer -0.0000341 0.0017543 0.95 -0.0034725 0.0034042 -0.0194595 169 0.9844745 -0.00 neg 2 1.825 2.175 DDR
SmokingStatusCurrent 0.0038678 0.0023119 0.95 -0.0006635 0.0083990 1.6729869 169 0.0943299 0.00 pos 1 0.825 1.175 DDR
model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("Untreated", "SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DTA") %>% 
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity + SmokingStatus + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DTA") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("Untreated", "SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib  + Age + Sex + Race + Ethnicity + SmokingStatus + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = df_to_plot %>%
            filter(Cohort != "CRC") %>%
            mutate(Cohort = factor(Cohort, levels = c("SCLC", "TNBC"))) %>%
            filter(Gene_Class == "DDR") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)

model_growth_rate_class <- rbind(
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib", 
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = df_to_plot %>%
            filter(Gene_Class == "DTA") %>% 
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DTA"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibTrilaciclib" ~ "UntreatedVSTrilaciclib",
            term == "TrilaciclibPlacebo" ~ "UntreatedVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    ),
    glm(
        formula = growth_rate ~ Trilaciclib + Age + Sex + Race + Ethnicity + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = df_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(Trilaciclib != "Untreated") %>%
            mutate(Trilaciclib = factor(Trilaciclib, levels = c("Trilaciclib", "Placebo"))) %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
         term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            TRUE ~ term
        ),
        Gene_Class = "DDR"
    )
)

21 Extra TP53 Analysis

df_to_plot <- g1_tp %>% 
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    )

model_growth_rate_cosmic_count_tp53 <- glm(
        formula = growth_rate ~ Trilaciclib + preVAF + CosmicCount + heme_cosmic_count + Cohort + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Gene == "TP53") %>%
            filter(Trilaciclib != "Untreated") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "CohortTNBC" ~ "SCLCvsTNBC",
            term == "CohortCRC" ~ "SCLCvsCRC",
            TRUE ~ term
        )
    )
model_growth_rate_cosmic_count_tp53
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
TrilaciclibVSPlacebo 0.0024906 0.0012385 0.95 0.0000632 0.0049180 2.0109850 118 0.0443270 * 0.00 * pos 9 8.825 9.175
preVAF -0.0394023 0.0324058 0.95 -0.1029165 0.0241120 -1.2159009 118 0.2240227 -0.04 neg 8 7.825 8.175
CosmicCount -0.0000002 0.0000036 0.95 -0.0000073 0.0000069 -0.0560298 118 0.9553180 -0.00 neg 7 6.825 7.175
heme_cosmic_count 0.0000319 0.0000706 0.95 -0.0001065 0.0001704 0.4519045 118 0.6513378 0.00 pos 6 5.825 6.175
SCLCvsTNBC -0.0012031 0.0021011 0.95 -0.0053212 0.0029151 -0.5725839 118 0.5669265 -0.00 neg 5 4.825 5.175
SCLCvsCRC -0.0028106 0.0018803 0.95 -0.0064959 0.0008748 -1.4947404 118 0.1349822 -0.00 neg 4 3.825 4.175
Age 0.0001207 0.0000528 0.95 0.0000173 0.0002242 2.2873112 118 0.0221777 * 0.00 * pos 3 2.825 3.175
SexFemale -0.0003225 0.0016540 0.95 -0.0035643 0.0029193 -0.1949850 118 0.8454047 -0.00 neg 2 1.825 2.175
RaceNon-White 0.0013892 0.0017105 0.95 -0.0019632 0.0047416 0.8121886 118 0.4166834 0.00 pos 1 0.825 1.175
model_growth_rate_cosmic_count_ppm1d <- glm(
        formula = growth_rate ~ Trilaciclib + preVAF + CosmicCount + heme_cosmic_count + Cohort + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Gene == "PPM1D") %>%
            filter(Trilaciclib != "Untreated") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "CohortTNBC" ~ "SCLCvsTNBC",
            term == "CohortCRC" ~ "SCLCvsCRC",
            TRUE ~ term
        )
    )
model_growth_rate_cosmic_count_ppm1d
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
TrilaciclibVSPlacebo 0.0057048 0.0013198 0.95 0.0031180 0.0082915 4.3224943 158 0.0000154 *** 0.01 *** pos 9 8.825 9.175
preVAF -0.0358074 0.0469521 0.95 -0.1278319 0.0562171 -0.7626355 158 0.4456808 -0.04 neg 8 7.825 8.175
CosmicCount -0.0000921 0.0002094 0.95 -0.0005026 0.0003184 -0.4396938 158 0.6601589 -0.00 neg 7 6.825 7.175
heme_cosmic_count -0.0006480 0.0005733 0.95 -0.0017717 0.0004756 -1.1303359 158 0.2583347 -0.00 neg 6 5.825 6.175
SCLCvsTNBC -0.0029524 0.0021387 0.95 -0.0071442 0.0012395 -1.3804302 158 0.1674542 -0.00 neg 5 4.825 5.175
SCLCvsCRC -0.0055207 0.0019564 0.95 -0.0093551 -0.0016862 -2.8218682 158 0.0047745 ** -0.01 ** neg 4 3.825 4.175
Age -0.0000920 0.0000638 0.95 -0.0002170 0.0000330 -1.4428806 158 0.1490541 -0.00 neg 3 2.825 3.175
SexFemale -0.0006944 0.0018043 0.95 -0.0042308 0.0028421 -0.3848344 158 0.7003601 -0.00 neg 2 1.825 2.175
RaceNon-White -0.0009411 0.0017796 0.95 -0.0044290 0.0025467 -0.5288664 158 0.5968981 -0.00 neg 1 0.825 1.175
model_growth_rate_cosmic_count_chek2 <- glm(
        formula = growth_rate ~ Trilaciclib + preVAF + CosmicCount + heme_cosmic_count + Cohort + Age + Sex + Race,
        data = df_to_plot %>%
            filter(Gene == "CHEK2") %>%
            filter(Trilaciclib != "Untreated") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "CohortTNBC" ~ "SCLCvsTNBC",
            term == "CohortCRC" ~ "SCLCvsCRC",
            TRUE ~ term
        )
    )
model_growth_rate_cosmic_count_chek2
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
TrilaciclibVSPlacebo 0.0029152 0.0019429 0.95 -0.0008928 0.0067231 1.5004570 54 0.1334961 0.00 pos 9 8.825 9.175
preVAF -0.0290738 0.0188689 0.95 -0.0660562 0.0079085 -1.5408341 54 0.1233572 -0.03 neg 8 7.825 8.175
CosmicCount -0.0001441 0.0007948 0.95 -0.0017018 0.0014136 -0.1812782 54 0.8561492 -0.00 neg 7 6.825 7.175
heme_cosmic_count -0.0024395 0.0059109 0.95 -0.0140247 0.0091457 -0.4127083 54 0.6798203 -0.00 neg 6 5.825 6.175
SCLCvsTNBC -0.0024415 0.0032384 0.95 -0.0087886 0.0039055 -0.7539356 54 0.4508879 -0.00 neg 5 4.825 5.175
SCLCvsCRC -0.0034162 0.0027355 0.95 -0.0087777 0.0019453 -1.2488384 54 0.2117242 -0.00 neg 4 3.825 4.175
Age 0.0000721 0.0000953 0.95 -0.0001146 0.0002588 0.7566054 54 0.4492863 0.00 pos 3 2.825 3.175
SexFemale -0.0015331 0.0027231 0.95 -0.0068703 0.0038041 -0.5629866 54 0.5734440 -0.00 neg 2 1.825 2.175
RaceNon-White 0.0031610 0.0031163 0.95 -0.0029468 0.0092689 1.0143608 54 0.3104106 0.00 pos 1 0.825 1.175
model_growth_rate_cosmic_count_ddr <- glm(
    formula = growth_rate ~ Trilaciclib + preVAF + CosmicCount + heme_cosmic_count + Cohort + Age + Sex + Race,
    data = df_to_plot %>%
        filter(Gene_Class == "DDR") %>%
        filter(Trilaciclib != "Untreated") %>%
        filter(growth_rate != "Inf" & growth_rate != "-Inf")
) %>%
    sjPlot::get_model_data(type = "est") %>%
    mutate(
        term = case_when(
            term == "TrilaciclibPlacebo" ~ "TrilaciclibVSPlacebo",
            term == "CohortTNBC" ~ "SCLCvsTNBC",
            term == "CohortCRC" ~ "SCLCvsCRC",
            TRUE ~ term
        )
    )
model_growth_rate_cosmic_count_ddr
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
TrilaciclibVSPlacebo 0.0039312 0.0008866 0.95 0.0021935 0.0056690 4.4338790 350 0.0000093 *** 0.00 *** pos 9 8.825 9.175
preVAF -0.0369267 0.0167706 0.95 -0.0697965 -0.0040569 -2.2018722 350 0.0276743 * -0.04 * neg 8 7.825 8.175
CosmicCount 0.0000000 0.0000043 0.95 -0.0000085 0.0000085 -0.0049319 350 0.9960650 -0.00 neg 7 6.825 7.175
heme_cosmic_count -0.0000521 0.0000826 0.95 -0.0002140 0.0001097 -0.6314511 350 0.5277456 -0.00 neg 6 5.825 6.175
SCLCvsTNBC -0.0025520 0.0014385 0.95 -0.0053715 0.0002675 -1.7740467 350 0.0760554 -0.00 neg 5 4.825 5.175
SCLCvsCRC -0.0046323 0.0013184 0.95 -0.0072163 -0.0020483 -3.5136434 350 0.0004420 *** -0.00 *** neg 4 3.825 4.175
Age 0.0000476 0.0000408 0.95 -0.0000323 0.0001276 1.1683493 350 0.2426659 0.00 pos 3 2.825 3.175
SexFemale -0.0005090 0.0011997 0.95 -0.0028605 0.0018424 -0.4242895 350 0.6713547 -0.00 neg 2 1.825 2.175
RaceNon-White 0.0016333 0.0012415 0.95 -0.0008001 0.0040667 1.3155584 350 0.1883223 0.00 pos 1 0.825 1.175

22 TP53 Line Plot

g1_tp %>%
    filter(Gene_Class == "DDR", change_in_days < 1000) %>%
    filter(preVAF > 0.01) %>%
    filter(Cohort != "Untreated") %>%
    filter(
        ifelse(
            Cohort == "TNBC",
            duringVAF > 0.01,
            postVAF > 0.01
        )
    ) %>%
    ggplot(
        aes(
            fill = Cohort,
            color = Trilaciclib,
            shape = Cohort
        )
    ) +
    geom_segment(
        aes(
            x = 0,
            y = 0,
            xend = change_in_days,
            yend = change_in_VAF
        ),
        size = 0.5
    ) +
    geom_point(aes(x = change_in_days, y = change_in_VAF)) +
    geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
    labs(
        x = "Change in Days",
        y = "Change in VAF",
        fill = "Cohort",
    ) +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
        scale_y_continuous(
        trans = scales::pseudo_log_trans(0.01),
        limits = \(x) {
              c(-max(abs(x)), max(abs(x)))
        }
    ) +
    theme(
        plot.margin = margin(1, 1, 1, 1, "cm")
    )

g1_tp %>%
    filter(Gene == "TP53") %>%
    filter(
        ifelse(
            Cohort == "TNBC",
            duringVAF > 0.01,
            postVAF > 0.01
        )
    ) %>%
    mutate(
        pre = preVAF,
        post = ifelse(Cohort == "TNBC", duringVAF, postVAF),
    ) %>%
    dplyr::select(pre, post, change_in_days, Cohort, Trilaciclib) %>%
    filter(change_in_days < 500) %>%
    ggplot(aes(fill = Cohort, color = Cohort, shape = Trilaciclib)) +
    geom_segment(
        aes(
            x = 0,
            y = pre,
            xend = change_in_days,
            yend = post
        ),
        size = 0.5
    ) +
    geom_point(
        aes(
            x = change_in_days,
            y = post
        )
    ) +
    geom_point(
        aes(
            x = 0,
            y = pre
        )
    ) +
    labs(
        x = "Change in Days",
        y = "VAF",
        fill = "Cohort",
    ) +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.01),
        #limits = \(x) {
        #      c(-max(abs(x)), max(abs(x)))
        #}
    )

23 Age Ribbon Plots - TNBC and CRC

g1_mut_table %>%
    filter(whichdraw == "PreTx") %>%
    dplyr::select(DeID, Age, Trilaciclib, TP53, PPM1D, CHEK2) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0)
    ) %>%
    group_by(Trilaciclib) %>%
    summarise(n_ddr_ch = sum(CH_Binary), n_total = n())
Trilaciclib n_ddr_ch n_total
Placebo 12 37
Trilaciclib 5 28
Untreated 37 176
g1_mut_table_tnbc %>%
    filter(whichdraw == "PreTx") %>%
    dplyr::select(DeID, Age, Trilaciclib, TP53, PPM1D, CHEK2) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0)
    ) %>%
    group_by(Trilaciclib) %>%
    summarise(n_ddr_ch = sum(CH_Binary), n_total = n())
Trilaciclib n_ddr_ch n_total
Placebo 1 11
Trilaciclib 5 27
Untreated 37 176
g1_mut_table_crc %>%
    filter(whichdraw == "PreTx") %>%
    dplyr::select(DeID, Age, Trilaciclib, TP53, PPM1D, CHEK2) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0)
    ) %>%
    group_by(Trilaciclib) %>%
    summarise(n_ddr_ch = sum(CH_Binary), n_total = n())
Trilaciclib n_ddr_ch n_total
Placebo 22 112
Trilaciclib 20 100
Untreated 37 176
g1_mut_table_tnbc %>%
    filter(whichdraw == "PreTx") %>%
    dplyr::select(DeID, Age, Trilaciclib, TP53, PPM1D, CHEK2) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0),
        Treatment = ifelse(Trilaciclib == "Untreated", "Healthy Control", "Baseline TNBC"),
        Treatment = factor(Treatment, levels = c("Healthy Control", "Baseline TNBC"))
    ) %>%
    ggplot(
        aes(
            x = Age,
            y = CH_Binary,
            fill = Treatment
        )
    ) +
    geom_smooth(
        aes(
            fill = Treatment,
            color = Treatment
        ),
        method = "gam",
        formula = y ~ s(x),
        method.args = list(family = "binomial"),
        size = 1.5,
        se = TRUE,
        alpha = 0.1
    ) +
    scale_y_continuous(labels = scales::label_percent(accuracy = 1L), expand = expansion(add = c(0.01, 0.01))) +
    scale_x_continuous(breaks = seq(0, 90, by = 5)) + 
    labs(x = "Age", y = "Frequency (%)") +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    #scale_fill_manual(values = treatment_colors) +
    #scale_color_manual(values = treatment_colors) +
    theme(
        plot.margin = margin(0, 1, 1, 1, "cm")
    )

g1_mut_table_crc %>%
    filter(whichdraw == "PreTx", Age >= 45) %>%
    mutate(
        CH_Binary = ifelse(rowSums(across(where(is.integer)), na.rm = TRUE) > 0, 1, 0),
        Treatment = ifelse(Trilaciclib == "Untreated", "Healthy Control", "Baseline CRC"),
        Treatment = factor(Treatment, levels = c("Healthy Control", "Baseline CRC"))
    ) %>%
    ggplot(
        aes(
            x = Age,
            y = CH_Binary,
            fill = Treatment
        )
    ) +
    geom_smooth(
        aes(
            fill = Treatment,
            color = Treatment
        ),
        method = "gam",
        formula = y ~ s(x),
        method.args = list(family = "binomial"),
        size = 1.5,
        se = TRUE,
        alpha = 0.1
    ) +
    scale_y_continuous(labels = scales::label_percent(accuracy = 1L), expand = expansion(add = c(0.01, 0.01))) +
    scale_x_continuous(breaks = seq(0, 90, by = 5)) + 
    labs(x = "Age", y = "Frequency (%)") +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    #scale_fill_manual(values = treatment_colors) +
    #scale_color_manual(values = treatment_colors) +
    theme(
        plot.margin = margin(0, 1, 1, 1, "cm")
    )

24 Mutation Distribution Plot

g1_df %>% filter(Cohort == "TNBC" | Cohort == "Untreated") %>%
    filter(whichdraw == "PreTx") %>%
    mutate(
        vaf_bin = case_when(
            average_AF >= 0.002 & average_AF < 0.01 ~ "0.2-1% VAF", 
            average_AF >= 0.01 & average_AF < 0.05 ~ "1-5% VAF",
            average_AF >= 0.05 ~ ">=5% VAF",
            average_AF >= 0.001 & average_AF < 0.002 ~ "0.1-0.2% VAF"
        )
    ) %>%
    group_by(Gene, vaf_bin) %>%
    summarise(n = n()) %>%
    group_by(Gene) %>%
    mutate(
        total = sum(n),
        percentage = (n / total) * 100,
        #vaf_bin = factor(vaf_bin, levels = c("0.1-0.2% VAF", "0.2-1% VAF", "1-5% VAF", ">=5% VAF")),
        vaf_bin = factor(vaf_bin, levels = c(">=5% VAF", "1-5% VAF", "0.2-1% VAF", "0.1-0.2% VAF")),
        Gene = factor(Gene, levels = c(gene_list))
    ) %>%
    ggplot(
        aes(
            x = Gene,
            y = percentage,
            fill = vaf_bin
        )
    ) +
    geom_bar(stat = "identity") +
    labs(
        x = "Gene",
        y = "Percentage of Mutations at Baseline",
        fill = "VAF Bin"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #axis.text.y = element_text(angle = 90, hjust = 1)
        plot.margin = margin(2, 1, 1, 1, "cm")
    ) +
    scale_fill_nejm()
## `summarise()` has grouped output by 'Gene'. You can override using the
## `.groups` argument.

g1_df %>% filter(Cohort == "CRC" | Cohort == "Untreated") %>%
    filter(whichdraw == "PreTx") %>%
    mutate(
        vaf_bin = case_when(
            average_AF >= 0.002 & average_AF < 0.01 ~ "0.2-1% VAF", 
            average_AF >= 0.01 & average_AF < 0.05 ~ "1-5% VAF",
            average_AF >= 0.05 ~ ">=5% VAF",
            average_AF >= 0.001 & average_AF < 0.002 ~ "0.1-0.2% VAF"
        )
    ) %>%
    group_by(Gene, vaf_bin) %>%
    summarise(n = n()) %>%
    group_by(Gene) %>%
    mutate(
        total = sum(n),
        percentage = (n / total) * 100,
        #vaf_bin = factor(vaf_bin, levels = c("0.1-0.2% VAF", "0.2-1% VAF", "1-5% VAF", ">=5% VAF")),
        vaf_bin = factor(vaf_bin, levels = c(">=5% VAF", "1-5% VAF", "0.2-1% VAF", "0.1-0.2% VAF")),
        Gene = factor(Gene, levels = c(gene_list))
    ) %>%
    ggplot(
        aes(
            x = Gene,
            y = percentage,
            fill = vaf_bin
        )
    ) +
    geom_bar(stat = "identity") +
    labs(
        x = "Gene",
        y = "Percentage of Mutations at Baseline",
        fill = "VAF Bin"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #axis.text.y = element_text(angle = 90, hjust = 1)
        plot.margin = margin(2, 1, 1, 1, "cm")
    ) +
    scale_fill_nejm()
## `summarise()` has grouped output by 'Gene'. You can override using the
## `.groups` argument.

25 Mutation Proportion Plot

# TNBC

g1_mut_table_tnbc %>%
    #filter(ifelse(Trilaciclib != "Untreated", whichdraw == "MidTx", whichdraw == "PostTx")) %>%
    filter(whichdraw == "PreTx") %>%
    mutate(Treatment = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))) %>%
    group_by(Treatment) %>%
    dplyr::select(-Age, -Trilaciclib, -whichdraw) %>%
    summarise_if(is.numeric, sum, na.rm = TRUE) %>%
    pivot_longer(cols = -c(Treatment), names_to = "Gene", values_to = "n") %>%
    group_by(Treatment, Gene) %>%
    left_join(
        g1_mut_table_tnbc %>%
            #filter(ifelse(Trilaciclib != "Untreated", whichdraw == "MidTx", whichdraw == "PostTx")) %>%
            filter(whichdraw == "PreTx") %>%
            mutate(Treatment = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))) %>%
            group_by(Treatment) %>%
            summarise(total_patients = n())
    ) %>%
    mutate(
        Proportion = n / total_patients,
        Gene = factor(Gene, levels = c(gene_list)),
        Treatment = factor(Treatment, levels = c("Untreated", "Placebo", "Trilaciclib"))
        #Trilaciclib = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))
    ) %>%
    ggplot(
        aes(
        x = Gene,
        y = Proportion,
        fill = Treatment
        )
    ) +
    geom_bar(color = "black", stat = "identity", position = position_dodge()) +
    labs(
        x = "",
        y = "Percent of Patients\nwith Mutated Gene\n Post-Treatment",
        fill = "Treatment"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = c(0.8, 0.6),
        legend.direction = "vertical",
        plot.margin = margin(2, 0, 1, 1, "cm")
    ) +
    scale_y_continuous(labels = scales::percent, limits = c(NA, 0.6), expand = c(0, 0)) +
    scale_fill_nejm() +
    scale_color_nejm()
## Joining with `by = join_by(Treatment)`

# CRC

g1_mut_table_crc %>%
    #filter(whichdraw == "PostTx") %>%
    filter(whichdraw == "PreTx") %>%
    mutate(Treatment = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))) %>%
    group_by(Treatment) %>%
    dplyr::select(-Age, -Trilaciclib, -whichdraw) %>%
    summarise_if(is.numeric, sum, na.rm = TRUE) %>%
    pivot_longer(cols = -c(Treatment), names_to = "Gene", values_to = "n") %>%
    group_by(Treatment, Gene) %>%
    left_join(
        g1_mut_table_crc %>%
            #filter(whichdraw == "PostTx") %>%
            filter(whichdraw == "PreTx") %>%
            mutate(Treatment = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))) %>%
            group_by(Treatment) %>%
            summarise(total_patients = n())
    ) %>%
    mutate(
        Proportion = n / total_patients,
        Gene = factor(Gene, levels = c(gene_list)),
        Treatment = factor(Treatment, levels = c("Untreated", "Placebo", "Trilaciclib"))
        #Trilaciclib = factor(Trilaciclib, levels = c("Untreated", "Placebo", "Trilaciclib"))
    ) %>%
    ggplot(
        aes(
        x = Gene,
        y = Proportion,
        fill = Treatment
        )
    ) +
    geom_bar(color = "black", stat = "identity", position = position_dodge()) +
    labs(
        x = "",
        y = "Percent of Patients\nwith Mutated Gene\n Post-Treatment",
        fill = "Treatment"
    ) +
    panel_theme_basic +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = c(0.8, 0.6),
        legend.direction = "vertical",
        plot.margin = margin(2, 0, 1, 1, "cm")
    ) +
    scale_y_continuous(labels = scales::percent, limits = c(NA, 0.6), expand = c(0, 0)) +
    scale_fill_nejm() +
    scale_color_nejm()
## Joining with `by = join_by(Treatment)`

26 Blood Counts Analysis

g1_tp_tmp <- g1_tp %>%
        filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(Cohort != "Untreated") %>%
    mutate(Cohort = factor(Cohort, levels = c("SCLC", "TNBC", "CRC"))) %>%
    mutate(
        VAF_1 = preVAF * 100,
        VAF_2 = case_when(
            Cohort == "TNBC" ~ duringVAF * 100,
            TRUE ~ postVAF * 100
        ),
        log_VAF_1 = log(VAF_1),
        log_VAF_2 = log(VAF_2),
    ) %>%
    filter(
        VAF_1 > 0 & VAF_2 > 0
    )

model_change_in_prop_blood_counts <- map_dfr(c("DNMT3A", "TET2", "ASXL1", "TP53", "PPM1D", "CHEK2"), function(gene){
    print(gene)
    glm(
        formula = VAF_2 ~ change_in_prop_BASO + change_in_prop_EOS + change_in_prop_LYM + change_in_prop_MONO + change_in_prop_NEUT + VAF_1,
        family = "gaussian",
        data = g1_tp_tmp %>% filter(Gene == gene)
    ) %>%
    sjPlot::get_model_data(type = "est") %>%
    mutate(Gene = gene)
})
## [1] "DNMT3A"
## [1] "TET2"
## [1] "ASXL1"
## [1] "TP53"
## [1] "PPM1D"
## [1] "CHEK2"
model_change_in_prop_blood_counts_2 <- map_dfr(c("DNMT3A", "TET2", "ASXL1", "TP53", "PPM1D", "CHEK2"), function(gene){
    print(gene)
    glm(
        formula = VAF_2/VAF_1 ~ change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN + Cohort + Trilaciclib,
        #formula = log_VAF_2 - log_VAF_1 ~ Trilaciclib * change_in_days,
        #formula = growth_rate ~ Trilaciclib + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = g1_tp_tmp %>% filter(Gene == gene) #%>% filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
    sjPlot::get_model_data(type = "est") %>%
    mutate(Gene = gene)
})
## [1] "DNMT3A"
## [1] "TET2"
## [1] "ASXL1"
## [1] "TP53"
## [1] "PPM1D"
## [1] "CHEK2"
model_change_in_prop_blood_counts_2
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax Gene
change_in_prop_MONO 1.4934571 9.2798258 0.95 -16.6946672 19.681581 0.1609359 404 0.8721439 1.49 pos 6 5.825 6.175 DNMT3A
change_in_prop_LYM 2.9195801 7.7817706 0.95 -12.3324101 18.171570 0.3751820 404 0.7075251 2.92 pos 5 4.825 5.175 DNMT3A
change_in_prop_GRAN 4.8243007 7.1597783 0.95 -9.2086069 18.857208 0.6738059 404 0.5004347 4.82 pos 4 3.825 4.175 DNMT3A
CohortTNBC 0.2303400 1.3021358 0.95 -2.3217993 2.782479 0.1768940 404 0.8595917 0.23 pos 3 2.825 3.175 DNMT3A
CohortCRC 1.2950453 0.9976450 0.95 -0.6603029 3.250393 1.2981024 404 0.1942522 1.30 pos 2 1.825 2.175 DNMT3A
TrilaciclibPlacebo 0.0226533 0.7063164 0.95 -1.3617014 1.407008 0.0320724 404 0.9744143 0.02 pos 1 0.825 1.175 DNMT3A
change_in_prop_MONO 10.8698045 38.9406258 0.95 -65.4524197 87.192029 0.2791379 147 0.7801390 10.87 pos 6 5.825 6.175 TET2
change_in_prop_LYM -18.6877544 37.4494126 0.95 -92.0872543 54.711745 -0.4990133 147 0.6177700 -18.69 neg 5 4.825 5.175 TET2
change_in_prop_GRAN -5.0619467 36.1955257 0.95 -76.0038734 65.879980 -0.1398501 147 0.8887785 -5.06 neg 4 3.825 4.175 TET2
CohortTNBC -0.2903057 5.8173777 0.95 -11.6921565 11.111545 -0.0499032 147 0.9601995 -0.29 neg 3 2.825 3.175 TET2
CohortCRC 4.5688181 4.8919581 0.95 -5.0192435 14.156880 0.9339447 147 0.3503324 4.57 pos 2 1.825 2.175 TET2
TrilaciclibPlacebo 0.9566802 3.0645785 0.95 -5.0497833 6.963144 0.3121735 147 0.7549087 0.96 pos 1 0.825 1.175 TET2
change_in_prop_MONO 32.6829535 67.8662777 0.95 -100.3325065 165.698414 0.4815787 33 0.6301053 32.68 pos 6 5.825 6.175 ASXL1
change_in_prop_LYM -49.9341114 91.0194476 0.95 -228.3289505 128.460728 -0.5486093 33 0.5832736 -49.93 neg 5 4.825 5.175 ASXL1
change_in_prop_GRAN -0.7639099 93.9469410 0.95 -184.8965306 183.368711 -0.0081313 33 0.9935122 -0.76 neg 4 3.825 4.175 ASXL1
CohortTNBC -4.8439631 22.8901059 0.95 -49.7077463 40.019820 -0.2116182 33 0.8324049 -4.84 neg 3 2.825 3.175 ASXL1
CohortCRC 10.1420710 19.4622936 0.95 -28.0033236 48.287466 0.5211139 33 0.6022875 10.14 pos 2 1.825 2.175 ASXL1
TrilaciclibPlacebo 5.4257347 10.8191084 0.95 -15.7793280 26.630797 0.5014955 33 0.6160224 5.43 pos 1 0.825 1.175 ASXL1
change_in_prop_MONO -4.4459468 19.4346827 0.95 -42.5372250 33.645331 -0.2287635 118 0.8190527 -4.45 neg 6 5.825 6.175 TP53
change_in_prop_LYM -14.3244238 17.8983893 0.95 -49.4046221 20.755775 -0.8003192 118 0.4235259 -14.32 neg 5 4.825 5.175 TP53
change_in_prop_GRAN -7.3212179 16.8697247 0.95 -40.3852707 25.742835 -0.4339856 118 0.6642989 -7.32 neg 4 3.825 4.175 TP53
CohortTNBC 1.2022635 2.0104765 0.95 -2.7381981 5.142725 0.5979993 118 0.5498404 1.20 pos 3 2.825 3.175 TP53
CohortCRC 1.9215089 1.7619594 0.95 -1.5318680 5.374886 1.0905523 118 0.2754699 1.92 pos 2 1.825 2.175 TP53
TrilaciclibPlacebo 2.0939412 1.2192743 0.95 -0.2957925 4.483675 1.7173668 118 0.0859122 2.09 pos 1 0.825 1.175 TP53
change_in_prop_MONO 6.4896568 25.4437413 0.95 -43.3791599 56.358473 0.2550591 151 0.7986775 6.49 pos 6 5.825 6.175 PPM1D
change_in_prop_LYM -15.8941530 31.7932360 0.95 -78.2077504 46.419444 -0.4999225 151 0.6171297 -15.89 neg 5 4.825 5.175 PPM1D
change_in_prop_GRAN -13.2883943 29.4235846 0.95 -70.9575605 44.380772 -0.4516239 151 0.6515399 -13.29 neg 4 3.825 4.175 PPM1D
CohortTNBC 4.2047565 4.7888556 0.95 -5.1812280 13.590741 0.8780295 151 0.3799277 4.20 pos 3 2.825 3.175 PPM1D
CohortCRC 3.4026267 3.6820805 0.95 -3.8141185 10.619372 0.9241044 151 0.3554320 3.40 pos 2 1.825 2.175 PPM1D
TrilaciclibPlacebo 8.0349910 2.7312467 0.95 2.6818459 13.388136 2.9418767 151 0.0032623 ** 8.03 ** pos 1 0.825 1.175 PPM1D
change_in_prop_MONO 20.4603604 52.8303756 0.95 -83.0852730 124.005994 0.3872840 53 0.6985460 20.46 pos 6 5.825 6.175 CHEK2
change_in_prop_LYM -41.9759607 41.9903306 0.95 -124.2754965 40.323575 -0.9996578 53 0.3174762 -41.98 neg 5 4.825 5.175 CHEK2
change_in_prop_GRAN -19.3816157 37.3783051 0.95 -92.6417476 53.878516 -0.5185258 53 0.6040915 -19.38 neg 4 3.825 4.175 CHEK2
CohortTNBC -1.9989926 5.5032248 0.95 -12.7851149 8.787130 -0.3632402 53 0.7164254 -2.00 neg 3 2.825 3.175 CHEK2
CohortCRC 3.2217396 4.1433286 0.95 -4.8990352 11.342514 0.7775728 53 0.4368209 3.22 pos 2 1.825 2.175 CHEK2
TrilaciclibPlacebo 3.6986406 3.2078433 0.95 -2.5886167 9.985898 1.1529992 53 0.2489107 3.70 pos 1 0.825 1.175 CHEK2
model_change_in_prop_blood_counts_3 <- map_dfr(c("DTA", "DDR"), function(gene_class){
    print(gene_class)
    glm(
        formula = VAF_2/VAF_1 ~ change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN + Cohort + Trilaciclib,
        #formula = log_VAF_2 - log_VAF_1 ~ Trilaciclib * change_in_days,
        #formula = growth_rate ~ Trilaciclib + change_in_prop_MONO + change_in_prop_LYM + change_in_prop_GRAN,
        data = g1_tp_tmp %>% filter(Gene_Class == gene_class) #%>% filter(growth_rate != "Inf" & growth_rate != "-Inf")
    ) %>%
    sjPlot::get_model_data(type = "est") %>%
    mutate(Gene_Class = gene_class)
})
## [1] "DTA"
## [1] "DDR"
model_change_in_prop_blood_counts_3
term estimate std.error conf.level conf.low conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax Gene_Class
change_in_prop_MONO 0.9952329 12.769689 0.95 -24.0328969 26.0233627 0.0779371 598 0.9378781 1.00 pos 6 5.825 6.175 DTA
change_in_prop_LYM -6.3158440 11.855610 0.95 -29.5524130 16.9207250 -0.5327304 598 0.5942202 -6.32 neg 5 4.825 5.175 DTA
change_in_prop_GRAN 1.8785401 11.086021 0.95 -19.8496610 23.6067411 0.1694513 598 0.8654417 1.88 pos 4 3.825 4.175 DTA
CohortTNBC 0.4829915 1.993335 0.95 -3.4238739 4.3898569 0.2423032 598 0.8085452 0.48 pos 3 2.825 3.175 DTA
CohortCRC 3.1266624 1.580680 0.95 0.0285868 6.2247381 1.9780491 598 0.0479232 * 3.13 * pos 2 1.825 2.175 DTA
TrilaciclibPlacebo 0.9965148 1.070084 0.95 -1.1008114 3.0938410 0.9312491 598 0.3517247 1.00 pos 1 0.825 1.175 DTA
change_in_prop_MONO -6.7311422 16.612319 0.95 -39.2906882 25.8284038 -0.4051898 336 0.6853380 -6.73 neg 6 5.825 6.175 DDR
change_in_prop_LYM -34.6267325 17.752422 0.95 -69.4208401 0.1673752 -1.9505357 336 0.0511123 -34.63 neg 5 4.825 5.175 DDR
change_in_prop_GRAN -23.7147043 16.479162 0.95 -56.0132679 8.5838593 -1.4390722 336 0.1501301 -23.71 neg 4 3.825 4.175 DDR
CohortTNBC 1.0635118 2.477417 0.95 -3.7921366 5.9191602 0.4292825 336 0.6677177 1.06 pos 3 2.825 3.175 DDR
CohortCRC 2.7879841 2.028004 0.95 -1.1868306 6.7627987 1.3747429 336 0.1692112 2.79 pos 2 1.825 2.175 DDR
TrilaciclibPlacebo 4.7567901 1.471579 0.95 1.8725477 7.6410325 3.2324389 336 0.0012274 ** 4.76 ** pos 1 0.825 1.175 DDR

27 For Reviewer Comments

# Spider Plot with Average Mean Slope
g1_tp %>%
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(!is.na(change_in_VAF), !is.na(Gene_Class)) %>%
    mutate(
        slope = change_in_VAF / change_in_days,
        starting_vaf = ifelse(preVAF < 0.02, "<0.02 Starting VAF", ">=0.02 Starting VAF")
    ) %>%
    group_by(Trilaciclib, Gene_Class, starting_vaf) %>%
    summarise(
        mean_slope = mean(slope),
        se_slope = sd(slope) / sqrt(n()),
        .groups = "drop"
    ) %>%
    group_by(Trilaciclib, Gene_Class, starting_vaf) %>%
    do({
        x_seq <- seq(0, max(g1_tp$change_in_days), length.out = 100)
        data.frame(
            x = x_seq,
            y = .$mean_slope * x_seq,
            ymin = (.$mean_slope - .$se_slope) * x_seq,
            ymax = (.$mean_slope + .$se_slope) * x_seq,
            Trilaciclib = .$Trilaciclib
        )
    }) %>%
    ggplot(
        aes(x = x, color = Trilaciclib, fill = Trilaciclib)
    ) +
    geom_ribbon(aes(ymin = ymin, ymax = ymax), alpha = 0.2) +
    geom_line(aes(y = y), size = 1, linetype = "dashed") +
    geom_point(
        data = g1_tp %>%
            filter(!is.na(change_in_VAF), !is.na(Gene_Class)),
        aes(x = change_in_days, y = change_in_VAF),
        alpha = 0.5
    ) +
    geom_segment(
        data = g1_tp %>%
            filter(!is.na(change_in_VAF), !is.na(Gene_Class)),
        aes(
            x = 0,
            y = 0,
            xend = change_in_days,
            yend = change_in_VAF
        ),
        alpha = 0.1
    ) +
    scale_color_nejm() +
    scale_fill_nejm() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.001),
        limits = \(x) {
            c(-max(abs(x)), max(abs(x)))
        }
    ) +
    labs(x = "Change in Days", y = "Change in VAF") +
    facet_wrap(~Gene_Class+starting_vaf, nrow = 2, ncol = 2) +
    panel_theme_basic +
    theme(
        plot.margin = margin(1, 1, 1, 1, "cm")
    )

# Generic Spider Plot
g1_tp %>%
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(!is.na(change_in_VAF), Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    ggplot(
        aes(
            x = change_in_days,
            y = change_in_VAF,
            fill = Trilaciclib
        )
    ) +
    geom_segment(
        aes(
            x = change_in_days,
            y = change_in_VAF,
            xend = 0,
            yend = 0,
            color = Trilaciclib
        ),
        size = 0.1,
        alpha = 0.5
    ) +
    geom_point(
        aes(
            x = change_in_days,
            y = change_in_VAF,
            color = Trilaciclib
        ),
        alpha = 0.5
    ) +
    labs(
        x = "",
        y = "Change in VAF",
        fill = "Trilaciclib",
    ) +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.001),
        limits = \(x) {
            c(-max(abs(x)), max(abs(x)))
        }
    ) +
    scale_x_sqrt() +
    facet_wrap(~Gene_Class) +
    theme(
        strip.text = element_blank(),
        plot.title = element_text(hjust = 0, vjust = 2, size = 20),
        plot.margin = margin(1, 1, 1, 1, "cm"),
        panel.spacing = unit(2, "lines")
    )

# Spider Plot by Gene with Starting VAF
g1_tp %>%
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    ungroup() %>%
    group_by(DeID, Gene) %>%
    mutate(
        change_in_days = mean(change_in_days),
        change_in_VAF = mean(change_in_VAF),
        preVAF = mean(preVAF)
    ) %>%
    ungroup() %>%
    ggplot(
        aes(
            x = change_in_days,
            y = change_in_VAF,
            color = Trilaciclib,
        )
    ) +
    geom_point(
        aes(x = change_in_days, y = change_in_VAF, size = preVAF),
        alpha = 0.5
    ) +
    geom_segment(
        aes(
            x = 0,
            y = 0,
            xend = change_in_days,
            yend = change_in_VAF
        ),
        alpha = 0.1
    ) +
    labs(
        x = "Change in Days",
        y = "Change in VAF",
        color = "Trilaciclib"
    ) +
    panel_theme_basic +
    scale_fill_nejm() +
    scale_color_nejm() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.001),
        limits = \(x) {
            c(-max(abs(x)), max(abs(x)))
        },
        labels = scales::percent_format(accuracy = 0.1)
    ) +
        theme(
            plot.margin = margin(1, 1, 1, 1, "cm"),
            # legend.position = "bottom"
        ) +
        facet_wrap(~Gene)

# Taking the Largest Clone per Patient per Gene
data_to_plot <- g1_tp %>%
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    group_by(DeID, Gene) %>%
    slice_max(abs(change_in_VAF), n = 1) %>%
    ungroup() %>%
    mutate(
        # Setting Alpha Value to indicate how much the VAF has changed
        alpha_val = case_when(
            abs(growth_rate) < 0.01 ~ 0.2,
            abs(growth_rate) >= 0.01 & abs(growth_rate) < 0.05 ~ 0.6,
            abs(growth_rate) >= 0.5 ~ 0.9
        ),
        # Defining the size of the point based on the starting VAF
        starting_vaf = case_when(
            preVAF < 0.02 ~ "<2%",
            preVAF >= 0.02 & preVAF < 0.05 ~ "2-5%",
            preVAF >= 0.05 ~ ">=5%"
        ),
        starting_vaf = factor(starting_vaf, levels = c("<2%", "2-5%", ">=5%"))
    )
data_to_plot %>%
    mutate(
        slope = change_in_VAF / change_in_days
    ) %>%
    group_by(Trilaciclib, Gene_Class) %>%
    summarise(
        mean_slope = mean(slope),
        se_slope = sd(slope) / sqrt(n()),
        .groups = "drop"
    ) %>%
    group_by(Trilaciclib, Gene_Class) %>%
    do({
        x_seq <- seq(0, max(g1_tp$change_in_days), length.out = 100)
        data.frame(
            x = x_seq,
            y = .$mean_slope * x_seq,
            ymin = (.$mean_slope - .$se_slope) * x_seq,
            ymax = (.$mean_slope + .$se_slope) * x_seq,
            Trilaciclib = .$Trilaciclib
        )
    }) %>%
    # ggplot(
    #     aes(
    #         x = change_in_days,
    #         y = change_in_VAF,
    #         color = Trilaciclib,
    #         size = starting_vaf,
    #         alpha = alpha_val
    #     )
    # ) +
    ggplot(
        aes(x = x, color = Trilaciclib, fill = Trilaciclib)
    ) +
    geom_ribbon(aes(ymin = ymin, ymax = ymax), alpha = 0.2) +
    geom_line(aes(y = y), size = 1, linetype = "dashed") +
    geom_point(
        data = data_to_plot,
        aes(x = change_in_days, y = change_in_VAF, size = starting_vaf, alpha = alpha_val),
    ) +
    geom_segment(
        data = data_to_plot,
        aes(
            x = 0,
            y = 0,
            xend = change_in_days,
            yend = change_in_VAF,
            #alpha = alpha_val
        ),
        alpha = 0.1,
        size = 0.5,
    ) +
    scale_alpha_identity() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.001),
        limits = \(x) {
            c(-max(abs(x)), max(abs(x)))
        },
        labels = scales::percent_format(accuracy = 0.1)
    ) +
    #scale_x_log10() +
    labs(
        x = "Change in Days",
        y = "Change in VAF",
        color = "Trilaciclib",
        size = "PreVAF"
    ) +
    panel_theme_basic +
    scale_fill_manual(values = treatment_colors) +
    scale_color_manual(values = treatment_colors) +
    facet_wrap(~Gene_Class + Trilaciclib) +
    theme(
        axis.text.y = element_text(size = 16),
    )
## Warning: Using size for a discrete variable is not advised.

# The idea is that depending on the starting VAF, the change in VAF will be different. This is a way to show that the change in VAF is dependent on the starting VAF.
data_to_plot <- g1_tp %>%
    filter(
        case_when(
            Cohort == "Untreated" ~ compare_group == "PrevsPost",
            Cohort == "SCLC" ~ compare_group == "C1D1vsC5D1",
            Cohort == "CRC" ~ compare_group == "C1D1vsMaintenance",
            Cohort == "TNBC" ~ compare_group == "C1D1vsC7D1",
            TRUE ~ TRUE
        )
    ) %>%
    filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
    #filter(Gene_Class == "DDR" | Gene_Class == "DTA") %>%
    filter(Gene_Class == "DDR") %>%
    group_by(DeID, Gene) %>%
    slice_max(abs(change_in_VAF), n = 1) %>%
    ungroup() %>%
    mutate(
        starting_clone_vaf = case_when(
            preVAF < 0.02 ~ "<2%",
            preVAF >= 0.02 & preVAF < 0.05 ~ "2-5%",
            preVAF >= 0.05 ~ ">=5%"
        ),
        starting_clone_vaf = factor(starting_clone_vaf, levels = c("<2%", "2-5%", ">=5%")),
        treatment_time = case_when(
            change_in_days < 30 ~ "1 Month",
            change_in_days >= 30 & change_in_days < 90 ~ "3 Months",
            change_in_days >= 90 & change_in_days < 180 ~ "Half a Year",
            change_in_days >= 180 & change_in_days < 360 ~ "A Year",
            change_in_days >= 360 ~ "Over A Year"
        ),
        treatment_time = factor(treatment_time, levels = c("1 Month", "3 Months", "Half a Year", "A Year")),
        alpha_val = case_when(
            abs(growth_rate) < 0.01 ~ 0.3,
            abs(growth_rate) >= 0.01 & abs(growth_rate) < 0.05 ~ 0.5,
            abs(growth_rate) >= 0.05 ~ 1
        )
    ) %>%
    mutate(
        change_in_months = change_in_days / 30,
        change_in_VAF_per_month = change_in_VAF / change_in_months,
        change_in_VAF_per_day = change_in_VAF / change_in_days,
    )
# Statistics
model_change_in_VAF <- rbind(
    glm(
        formula = change_in_VAF_per_month ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "SCLC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(Cohort = "SCLC"),
    glm(
        formula = change_in_VAF_per_month ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "TNBC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(Cohort = "TNBC"),
    glm(
        formula = change_in_VAF_per_month ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "CRC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
    mutate(Cohort = "CRC")
)
model_change_in_VAF <- model_change_in_VAF %>% filter(term == "TrilaciclibPlacebo")

model_change_in_VAF <- rbind(
    glm(
        formula = change_in_VAF ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "SCLC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(Cohort = "SCLC"),
    glm(
        formula = change_in_VAF ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "TNBC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(Cohort = "TNBC"),
    glm(
        formula = change_in_VAF ~ Trilaciclib + Age,
        data = data_to_plot %>%
            filter(Gene_Class == "DDR") %>%
            filter(growth_rate != "Inf" & growth_rate != "-Inf") %>%
            filter(Cohort == "CRC")
    ) %>% sjPlot::get_model_data(type = "est") %>%
        mutate(Cohort = "CRC")
)
model_change_in_VAF <- model_change_in_VAF %>% filter(term == "TrilaciclibPlacebo")

data_to_plot %>%
    ggplot(
        aes(
            x = Cohort,
            y = change_in_VAF,
            color = Trilaciclib,
            size = starting_clone_vaf,
            alpha = alpha_val
        )
    ) +
    geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
    geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.1, size = 1, outlier.shape = NA) +
    geom_point(position = position_jitterdodge(jitter.width=1)) +
    #facet_wrap(~Gene_Class) +
    #geom_point(position = position_dodge(width = 0.8), alpha = 0.3) +
    labs(
        x = "",
        y = "Change in VAF",
        color = "Trilaciclib",
        size = "Starting Clone VAF",
    ) +
    panel_theme_basic +
    #scale_fill_nejm() +
    #scale_color_nejm() +
    scale_fill_manual(values = treatment_colors) +
    scale_color_manual(values = treatment_colors) +
    scale_alpha_identity() +
    scale_y_continuous(
        trans = scales::pseudo_log_trans(0.0001),
        limits = \(x) {c(-max(abs(x)), max(abs(x)))},
        #breaks = scales::extended_breaks(n = 10),
        breaks = c(-0.1, -0.05, -0.01, -0.005, -0.001, 0, 0.001, 0.005, 0.01, 0.05, 0.1),
        labels = scales::percent_format(accuracy = 0.1)
    ) +
    theme(
        legend.position = "right",
        legend.direction = "vertical",
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 18)
    ) +
    guides(
        color = guide_legend(title.position = "top", title = "Drug Treatment"),
        size = guide_legend(title.position = "top")
    ) +
    do.call(geom_signif, default_sig_data(data_to_plot %>% filter(Cohort == "SCLC"), 6, 1.8, 2.2, model_change_in_VAF$p.value[1], model_change_in_VAF$p.stars[1], show_values = TRUE)) +
    do.call(geom_signif, default_sig_data(data_to_plot %>% filter(Cohort == "TNBC"), 7, 2.8, 3.2, model_change_in_VAF$p.value[2], model_change_in_VAF$p.stars[2], show_values = TRUE)) +
    do.call(geom_signif, default_sig_data(data_to_plot %>% filter(Cohort == "CRC"), 8, 3.8, 4.2, model_change_in_VAF$p.value[3], model_change_in_VAF$p.stars[3], show_values = TRUE))
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning in (function (mapping = NULL, data = NULL, stat = "signif", position =
## "identity", : Ignoring unknown parameters: `fill`
## Warning: Using size for a discrete variable is not advised.
## Warning: The following aesthetics were dropped during statistical transformation: alpha.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: alpha.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: alpha.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

# Density plot of preVAF
g1_tp %>%
    ggplot(
        aes(
            x = growth_rate,
            fill = Trilaciclib
        )
    ) +
    geom_density(alpha = 0.5) +
    scale_x_log10() +
    labs(
        x = "PreVAF",
        fill = "Trilaciclib"
    ) +
    panel_theme_basic +
    scale_fill_nejm() +
    theme(
        plot.margin = margin(1, 1, 1, 1, "cm")
    )
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 703 rows containing non-finite outside the scale range
## (`stat_density()`).

supp_table_3 <- g1_tp %>%
    dplyr::select(DeID, Cohort, Trilaciclib, Gene, key, prepreVAF, preVAF, duringVAF, postVAF, postpostVAF, compare_group, change_in_days) %>%
    mutate(
        Timepoint_1_VAF = preVAF,
        Timepoint_2_VAF = case_when(
            compare_group == "PrevsPost" ~ postVAF,
            compare_group == "C1D1vsC5D1" ~ postVAF,
            compare_group == "C1D1vs90DPost-Maintenance" ~ postpostVAF,
            compare_group == "C1D1vsC7D1" ~ duringVAF,
            compare_group == "C1D1vsMaintenance" ~ postVAF
        ),
        change_in_days = round(change_in_days)
    ) %>%
    dplyr::select(-c(prepreVAF, preVAF, duringVAF, postVAF, postpostVAF, compare_group))

#fwrite(supp_table_3, "supp_table_3.csv")